xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision ffad99011bdf8bdff5e8540ef3c49b4fd8d6e6bb)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscInt       i;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
54   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
55   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   for (i=0;i<pcbddc->n_ISForDofs;i++) {
59     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
60   }
61   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
62   pcbddc->n_ISForDofs = 0;
63   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
64   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "PCBDDCResetTopography"
70 PetscErrorCode PCBDDCResetTopography(PC pc)
71 {
72   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
77   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
78   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "PCBDDCResetSolvers"
84 PetscErrorCode PCBDDCResetSolvers(PC pc)
85 {
86   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
87   PetscErrorCode ierr;
88 
89   PetscFunctionBegin;
90   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
91   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
97   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
99   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
108   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
111   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
118 {
119   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
120   PC_IS          *pcis = (PC_IS*)pc->data;
121   VecType        impVecType;
122   PetscInt       n_constraints,n_R,old_size;
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   if (!pcbddc->ConstraintMatrix) {
127     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
128   }
129   /* get sizes */
130   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
131   n_R = pcis->n-pcbddc->n_actual_vertices;
132   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
133   /* local work vectors (try to avoid unneeded work)*/
134   /* R nodes */
135   old_size = -1;
136   if (pcbddc->vec1_R) {
137     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
138   }
139   if (n_R != old_size) {
140     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
141     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
142     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
143     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
144     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
145     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
146   }
147   /* local primal dofs */
148   old_size = -1;
149   if (pcbddc->vec1_P) {
150     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
151   }
152   if (pcbddc->local_primal_size != old_size) {
153     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
156     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
157   }
158   /* local explicit constraints */
159   old_size = -1;
160   if (pcbddc->vec1_C) {
161     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
162   }
163   if (n_constraints && n_constraints != old_size) {
164     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
167     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "PCBDDCSetUpCorrection"
174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
175 {
176   PetscErrorCode         ierr;
177   /* pointers to pcis and pcbddc */
178   PC_IS*                 pcis = (PC_IS*)pc->data;
179   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
180   /* submatrices of local problem */
181   Mat                    A_RV,A_VR,A_VV;
182   /* working matrices */
183   Mat                    M1,M2,M3,C_CR;
184   /* working vectors */
185   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
186   /* additional working stuff */
187   IS                     is_aux;
188   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
189   const PetscScalar      *array,*row_cmat_values;
190   const PetscInt         *row_cmat_indices,*idx_R_local;
191   PetscInt               *idx_V_B,*auxindices;
192   PetscInt               n_vertices,n_constraints,size_of_constraint;
193   PetscInt               i,j,n_R,n_D,n_B;
194   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
195   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
196   MatType                impMatType;
197   /* some shortcuts to scalars */
198   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
199   /* for debugging purposes */
200   PetscReal              *coarsefunctions_errors,*constraints_errors;
201 
202   PetscFunctionBegin;
203   /* get number of vertices (corners plus constraints with change of basis)
204      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
205   n_vertices = pcbddc->n_actual_vertices;
206   n_constraints = pcbddc->local_primal_size-n_vertices;
207   /* Set Non-overlapping dimensions */
208   n_B = pcis->n_B; n_D = pcis->n - n_B;
209   n_R = pcis->n-n_vertices;
210 
211   /* Set types for local objects needed by BDDC precondtioner */
212   impMatType = MATSEQDENSE;
213 
214   /* Allocating some extra storage just to be safe */
215   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
216   for (i=0;i<pcis->n;i++) auxindices[i]=i;
217 
218   /* vertices in boundary numbering */
219   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
220   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
221   if (i != n_vertices) {
222     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
223   }
224 
225   /* Precompute stuffs needed for preprocessing and application of BDDC*/
226   if (n_constraints) {
227     /* see if we can save some allocations */
228     if (pcbddc->local_auxmat2) {
229       PetscInt on_R,on_constraints;
230       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
231       if (on_R != n_R || on_constraints != n_constraints) {
232         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
233         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
234       }
235     }
236     /* work vectors */
237     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
238     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
239     /* auxiliary matrices */
240     if (!pcbddc->local_auxmat2) {
241       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
242       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
243       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
244       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
245     }
246 
247     /* Extract constraints on R nodes: C_{CR}  */
248     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
249     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
250     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
251 
252     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
253     for (i=0;i<n_constraints;i++) {
254       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
255       /* Get row of constraint matrix in R numbering */
256       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
257       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
258       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
259       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
260       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
261       /* Solve for row of constraint matrix in R numbering */
262       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
263       /* Set values in local_auxmat2 */
264       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
265       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
266       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
267     }
268     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
271 
272     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
273     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
274     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
275     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
276     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
277     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
278     ierr = MatSetUp(M1);CHKERRQ(ierr);
279     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
280     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
281     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
282     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
283     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
284     ierr = MatDestroy(&M2);CHKERRQ(ierr);
285     ierr = MatDestroy(&M3);CHKERRQ(ierr);
286     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
287     if (!pcbddc->local_auxmat1) {
288       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
289     } else {
290       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
291     }
292   }
293 
294   /* Get submatrices from subdomain matrix */
295   if (n_vertices) {
296     PetscInt ibs,mbs;
297     PetscBool issbaij;
298     Mat newmat;
299 
300     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
301     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
302     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
303     if (ibs != mbs) { /* need to convert to SEQAIJ */
304       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
305       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
308       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
309     } else {
310       /* this is safe */
311       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
312       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
313       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
314         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
315         /* which of the two approaches is faster? */
316         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
317         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
318         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
319         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
320         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
321       } else {
322         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
323         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
324       }
325     }
326     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
327     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
328     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
329   }
330 
331   /* Matrix of coarse basis functions (local) */
332   if (pcbddc->coarse_phi_B) {
333     PetscInt on_B,on_primal;
334     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
335     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
336       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
337       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
338     }
339   }
340   if (pcbddc->coarse_phi_D) {
341     PetscInt on_D,on_primal;
342     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
343     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
344       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
345       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
346     }
347   }
348   if (!pcbddc->coarse_phi_B) {
349     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
350     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
351     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
352     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
353   }
354   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
355     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
356     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
357     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
358     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
359   }
360 
361   if (pcbddc->dbg_flag) {
362     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
363     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
364     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
365   }
366   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
367   ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr);
368 
369   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
370 
371   /* vertices */
372   for (i=0;i<n_vertices;i++) {
373     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
374     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
375     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
376     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
377     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
378     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
379     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
380     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
381     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
382     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
383     if (n_constraints) {
384       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
385       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
386       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
387     }
388     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
389     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
390 
391     /* Set values in coarse basis function and subdomain part of coarse_mat */
392     /* coarse basis functions */
393     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
394     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
397     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
398     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
400     if (pcbddc->switch_static || pcbddc->dbg_flag) {
401       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
404       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
405       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
406     }
407     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
408     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
409     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
410     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
411     if (n_constraints) {
412       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
413       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
414       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
415     }
416 
417     /* check */
418     if (pcbddc->dbg_flag) {
419       /* assemble subdomain vector on local nodes */
420       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
421       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
422       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
423       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
425       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
426       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
427       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
428       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
429       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
430       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
431       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
432       if (n_constraints) {
433         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
434         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
435         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
436       }
437       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
440       /* check saddle point solution */
441       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
443       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
444       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
445       /* shift by the identity matrix */
446       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
447       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
450     }
451   }
452 
453   /* constraints */
454   for (i=0;i<n_constraints;i++) {
455     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
456     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
457     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
458     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
459     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
460     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
461     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
462     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
463     if (n_vertices) {
464       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
465     }
466     /* Set values in coarse basis function and subdomain part of coarse_mat */
467     /* coarse basis functions */
468     j = i+n_vertices; /* don't touch this! */
469     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
470     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
473     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
474     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
475     if (pcbddc->switch_static || pcbddc->dbg_flag) {
476       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
479       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
480       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
481     }
482     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
483     if (n_vertices) {
484       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
485       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
486       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
487     }
488     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
489     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
490     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
491 
492     if (pcbddc->dbg_flag) {
493       /* assemble subdomain vector on nodes */
494       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
495       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
496       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
497       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
498       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
499       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
500       /* assemble subdomain vector of lagrange multipliers */
501       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
502       if (n_vertices) {
503         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
504         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
505         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
506       }
507       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
508       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
509       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
510       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
511       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
512       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
513       /* check saddle point solution */
514       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
515       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
516       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
517       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
518       /* shift by the identity matrix */
519       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
520       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
521       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
522       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
523     }
524   }
525   /* call assembling routines for local coarse basis */
526   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   if (pcbddc->switch_static || pcbddc->dbg_flag) {
529     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531   }
532 
533   /* compute other basis functions for non-symmetric problems */
534   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
535   if (!setsym || (setsym && !issym)) {
536     if (!pcbddc->coarse_psi_B) {
537       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
538       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
539       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
540       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
541     }
542     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
543       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
544       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
545       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
546       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
547     }
548     for (i=0;i<pcbddc->local_primal_size;i++) {
549       if (n_constraints) {
550         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
551         for (j=0;j<n_constraints;j++) {
552           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
553         }
554         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
555         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
556       }
557       if (i<n_vertices) {
558         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
559         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
560         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
561         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
562         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
563         if (n_constraints) {
564           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
565         }
566       } else {
567         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
568       }
569       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
570       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
571       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
573       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
574       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
575       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
576       if (i<n_vertices) {
577         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
578       }
579       if (pcbddc->switch_static || pcbddc->dbg_flag) {
580         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
581         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
582         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
583         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
584         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
585       }
586 
587       if (pcbddc->dbg_flag) {
588         /* assemble subdomain vector on nodes */
589         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
590         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
591         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
592         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
593         if (i<n_vertices) {
594           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
595         }
596         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
597         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
598         /* assemble subdomain vector of lagrange multipliers */
599         for (j=0;j<pcbddc->local_primal_size;j++) {
600           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
601         }
602         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
603         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
604         /* check saddle point solution */
605         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
606         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
607         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
608         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
609         /* shift by the identity matrix */
610         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
611         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
612         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
613         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
614       }
615     }
616     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
618     if (pcbddc->switch_static || pcbddc->dbg_flag) {
619       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621     }
622   }
623   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
624   /* Checking coarse_sub_mat and coarse basis functios */
625   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
626   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
627   if (pcbddc->dbg_flag) {
628     Mat         coarse_sub_mat;
629     Mat         AUXMAT,TM1,TM2,TM3,TM4;
630     Mat         coarse_phi_D,coarse_phi_B;
631     Mat         coarse_psi_D,coarse_psi_B;
632     Mat         A_II,A_BB,A_IB,A_BI;
633     MatType     checkmattype=MATSEQAIJ;
634     PetscReal   real_value;
635 
636     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
637     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
638     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
639     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
640     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
641     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
642     if (pcbddc->coarse_psi_B) {
643       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
644       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
645     }
646     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
647 
648     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
649     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
650     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
651     if (pcbddc->coarse_psi_B) {
652       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
653       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
654       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
655       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
656       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
657       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
658       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
659       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
660       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
661       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
662       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
663       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
664     } else {
665       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
666       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
667       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
668       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
669       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
670       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
671       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
672       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
673     }
674     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
675     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
676     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
677     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
678     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
679     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
680     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
681     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
682     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
683     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
684     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
685     for (i=0;i<pcbddc->local_primal_size;i++) {
686       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
687     }
688     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
689     for (i=0;i<pcbddc->local_primal_size;i++) {
690       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
691     }
692     if (pcbddc->coarse_psi_B) {
693       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
694       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
695         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
696       }
697       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
698       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
699         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
700       }
701     }
702     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
703     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
704     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
705     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
706     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
707     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
708     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
709     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
710     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
711     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
712     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
713     if (pcbddc->coarse_psi_B) {
714       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
715       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
716     }
717     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
718     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
719     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
720     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
721   }
722   /* free memory */
723   if (n_vertices) {
724     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
725     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
726     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
727     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
728     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
729   }
730   if (n_constraints) {
731     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
732     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
733     ierr = MatDestroy(&M1);CHKERRQ(ierr);
734     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
735   }
736   ierr = PetscFree(auxindices);CHKERRQ(ierr);
737   /* get back data */
738   *coarse_submat_vals_n = coarse_submat_vals;
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
744 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
745 {
746   PC_IS*            pcis = (PC_IS*)(pc->data);
747   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
748   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
749   PetscBool         issbaij,isbaij;
750   /* manage repeated solves */
751   MatReuse          reuse;
752   PetscErrorCode    ierr;
753 
754   PetscFunctionBegin;
755   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
756     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
757   }
758   /* get mat flags */
759   reuse = MAT_INITIAL_MATRIX;
760   if (pc->setupcalled) {
761     if (pc->flag == SAME_NONZERO_PATTERN) {
762       reuse = MAT_REUSE_MATRIX;
763     } else {
764       reuse = MAT_INITIAL_MATRIX;
765     }
766   }
767   if (reuse == MAT_INITIAL_MATRIX) {
768     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
769     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
770     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
771     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
772     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
773   }
774 
775   /* transform local matrices if needed */
776   if (pcbddc->use_change_of_basis) {
777     Mat         change_mat_all;
778     PetscScalar *row_cmat_values;
779     PetscInt    *row_cmat_indices;
780     PetscInt    *nnz,*is_indices,*temp_indices;
781     PetscInt    i,j,k,n_D,n_B;
782 
783     /* Get Non-overlapping dimensions */
784     n_B = pcis->n_B;
785     n_D = pcis->n-n_B;
786 
787     /* compute nonzero structure of change of basis on all local nodes */
788     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
789     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
790     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
791     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
792     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
793     k=1;
794     for (i=0;i<n_B;i++) {
795       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
796       nnz[is_indices[i]]=j;
797       if (k < j) k = j;
798       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
799     }
800     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
801     /* assemble change of basis matrix on the whole set of local dofs */
802     ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr);
803     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
804     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
805     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
806     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
807     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
808     for (i=0;i<n_D;i++) {
809       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
810     }
811     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
812     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
813     for (i=0;i<n_B;i++) {
814       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
815       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
816       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
817       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
818     }
819     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
820     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
821     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
822     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
823     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
824     if (!issbaij && !isbaij) {
825       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
826     } else {
827       Mat work_mat;
828       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
829       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
830       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
831     }
832     /*
833     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
834     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
835     */
836     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
837     ierr = PetscFree(nnz);CHKERRQ(ierr);
838     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
839   } else {
840     /* without change of basis, the local matrix is unchanged */
841     if (!pcbddc->local_mat) {
842       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
843       pcbddc->local_mat = matis->A;
844     }
845   }
846 
847   /* get submatrices */
848   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
849   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
850   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
851   if (!issbaij) {
852     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
853     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
854   } else {
855     Mat newmat;
856     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
857     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
858     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
859     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
860   }
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
866 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
867 {
868   PC_IS*         pcis = (PC_IS*)(pc->data);
869   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
870   IS             is_aux1,is_aux2;
871   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
872   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
873   PetscInt       vbs,bs;
874   PetscBT        bitmask;
875   PetscErrorCode ierr;
876 
877   PetscFunctionBegin;
878   /* No need to setup local scatters if primal space is unchanged */
879   if (!pcbddc->new_primal_space_local) {
880     PetscFunctionReturn(0);
881   }
882   /* destroy old objects */
883   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
884   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
885   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
886   /* Set Non-overlapping dimensions */
887   n_B = pcis->n_B; n_D = pcis->n - n_B;
888   n_vertices = pcbddc->n_actual_vertices;
889   /* create auxiliary bitmask */
890   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
891   for (i=0;i<n_vertices;i++) {
892     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
893   }
894 
895   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
896   ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr);
897   for (i=0, n_R=0; i<pcis->n; i++) {
898     if (!PetscBTLookup(bitmask,i)) {
899       idx_R_local[n_R] = i;
900       n_R++;
901     }
902   }
903 
904   /* Block code */
905   vbs = 1;
906   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
907   if (bs>1 && !(n_vertices%bs)) {
908     PetscBool is_blocked = PETSC_TRUE;
909     PetscInt  *vary;
910     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
911     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
912     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
913     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
914     for (i=0; i<n_vertices; i++) {
915       if (vary[i]!=0 && vary[i]!=bs) {
916         is_blocked = PETSC_FALSE;
917         break;
918       }
919     }
920     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
921       vbs = bs;
922       for (i=0;i<n_R/vbs;i++) {
923         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
924       }
925     }
926     ierr = PetscFree(vary);CHKERRQ(ierr);
927   }
928   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
929   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
930 
931   /* print some info if requested */
932   if (pcbddc->dbg_flag) {
933     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
934     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
935     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
936     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
937     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
938     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
939     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
940     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
941   }
942 
943   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
944   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
945   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr);
946   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr);
947   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
948   for (i=0; i<n_D; i++) {
949     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
950   }
951   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
952   for (i=0, j=0; i<n_R; i++) {
953     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
954       aux_array1[j++] = i;
955     }
956   }
957   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
958   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
959   for (i=0, j=0; i<n_B; i++) {
960     if (!PetscBTLookup(bitmask,is_indices[i])) {
961       aux_array2[j++] = i;
962     }
963   }
964   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
965   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
966   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
967   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
968   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
969 
970   if (pcbddc->switch_static || pcbddc->dbg_flag) {
971     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
972     for (i=0, j=0; i<n_R; i++) {
973       if (PetscBTLookup(bitmask,idx_R_local[i])) {
974         aux_array1[j++] = i;
975       }
976     }
977     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
978     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
979     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
980   }
981   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
982   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
989 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
990 {
991   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
992   PC_IS          *pcis = (PC_IS*)pc->data;
993   PC             pc_temp;
994   Mat            A_RR;
995   MatReuse       reuse;
996   PetscScalar    m_one = -1.0;
997   PetscReal      value;
998   PetscInt       n_D,n_R,ibs,mbs;
999   PetscBool      use_exact,use_exact_reduced,issbaij;
1000   PetscErrorCode ierr;
1001   /* prefixes stuff */
1002   char           dir_prefix[256],neu_prefix[256],str_level[3];
1003   size_t         len;
1004 
1005   PetscFunctionBegin;
1006 
1007   /* compute prefixes */
1008   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1009   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1010   if (!pcbddc->current_level) {
1011     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1012     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1013     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1014     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1015   } else {
1016     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1017     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1018     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1019     len -= 15; /* remove "pc_bddc_coarse_" */
1020     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1021     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1022     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1023     *(dir_prefix+len)='\0';
1024     *(neu_prefix+len)='\0';
1025     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1026     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1027     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1028     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1029   }
1030 
1031   /* DIRICHLET PROBLEM */
1032   /* Matrix for Dirichlet problem is pcis->A_II */
1033   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1034   if (!pcbddc->ksp_D) { /* create object if not yet build */
1035     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1036     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1037     /* default */
1038     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1039     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1040     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1041     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1042     if (issbaij) {
1043       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1044     } else {
1045       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1046     }
1047     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1048   }
1049   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1050   /* Allow user's customization */
1051   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1052   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1053   if (!n_D) {
1054     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1055     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1056   }
1057   /* Set Up KSP for Dirichlet problem of BDDC */
1058   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1059   /* set ksp_D into pcis data */
1060   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1061   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1062   pcis->ksp_D = pcbddc->ksp_D;
1063 
1064   /* NEUMANN PROBLEM */
1065   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1066   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1067   if (pcbddc->ksp_R) { /* already created ksp */
1068     PetscInt nn_R;
1069     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1070     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1071     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1072     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1073       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1074       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1075       reuse = MAT_INITIAL_MATRIX;
1076     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1077       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1078         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1079         reuse = MAT_INITIAL_MATRIX;
1080       } else { /* safe to reuse the matrix */
1081         reuse = MAT_REUSE_MATRIX;
1082       }
1083     }
1084     /* last check */
1085     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1086       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1087       reuse = MAT_INITIAL_MATRIX;
1088     }
1089   } else { /* first time, so we need to create the matrix */
1090     reuse = MAT_INITIAL_MATRIX;
1091   }
1092   /* extract A_RR */
1093   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1094   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1095   if (ibs != mbs) {
1096     Mat newmat;
1097     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1098     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1099     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1100   } else {
1101     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1102   }
1103   if (!pcbddc->ksp_R) { /* create object if not present */
1104     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1105     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1106     /* default */
1107     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1108     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1109     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1110     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1111     if (issbaij) {
1112       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1113     } else {
1114       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1115     }
1116     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1117   }
1118   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1119   /* Allow user's customization */
1120   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1121   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1122   if (!n_R) {
1123     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1124     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1125   }
1126   /* Set Up KSP for Neumann problem of BDDC */
1127   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1128 
1129   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1130   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1131     /* Dirichlet */
1132     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1133     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1134     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1135     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1136     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1137     /* need to be adapted? */
1138     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1139     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1140     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1141     /* print info */
1142     if (pcbddc->dbg_flag) {
1143       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1144       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1145       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1146       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1147       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1148       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1149     }
1150     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1151       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1152     }
1153 
1154     /* Neumann */
1155     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1156     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1157     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1158     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1159     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1160     /* need to be adapted? */
1161     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1162     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1163     /* print info */
1164     if (pcbddc->dbg_flag) {
1165       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1166       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1167     }
1168     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1169       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1170     }
1171   }
1172   /* free Neumann problem's matrix */
1173   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1179 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1180 {
1181   PetscErrorCode ierr;
1182   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1183 
1184   PetscFunctionBegin;
1185   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1186   if (pcbddc->local_auxmat1) {
1187     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1188     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1189   }
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1195 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1196 {
1197   PetscErrorCode ierr;
1198   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1199   PC_IS*            pcis = (PC_IS*)  (pc->data);
1200   const PetscScalar zero = 0.0;
1201 
1202   PetscFunctionBegin;
1203   /* Application of PHI^T (or PSI^T)  */
1204   if (pcbddc->coarse_psi_B) {
1205     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1206     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1207   } else {
1208     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1209     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1210   }
1211   /* Scatter data of coarse_rhs */
1212   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1213   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214 
1215   /* Local solution on R nodes */
1216   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1217   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1218   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   if (pcbddc->switch_static) {
1220     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1221     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1222   }
1223   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1224   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1225   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1226   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1227   if (pcbddc->switch_static) {
1228     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230   }
1231 
1232   /* Coarse solution */
1233   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1235     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1236   }
1237   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1239 
1240   /* Sum contributions from two levels */
1241   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1242   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1248 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1249 {
1250   PetscErrorCode ierr;
1251   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1252 
1253   PetscFunctionBegin;
1254   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1260 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1261 {
1262   PetscErrorCode ierr;
1263   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1264 
1265   PetscFunctionBegin;
1266   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 /* uncomment for testing purposes */
1271 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1272 #undef __FUNCT__
1273 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1274 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1275 {
1276   PetscErrorCode    ierr;
1277   PC_IS*            pcis = (PC_IS*)(pc->data);
1278   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1279   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1280   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1281   MatType           impMatType=MATSEQAIJ;
1282   /* one and zero */
1283   PetscScalar       one=1.0,zero=0.0;
1284   /* space to store constraints and their local indices */
1285   PetscScalar       *temp_quadrature_constraint;
1286   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1287   /* iterators */
1288   PetscInt          i,j,k,total_counts,temp_start_ptr;
1289   /* stuff to store connected components stored in pcbddc->mat_graph */
1290   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1291   PetscInt          n_ISForFaces,n_ISForEdges;
1292   /* near null space stuff */
1293   MatNullSpace      nearnullsp;
1294   const Vec         *nearnullvecs;
1295   Vec               *localnearnullsp;
1296   PetscBool         nnsp_has_cnst;
1297   PetscInt          nnsp_size;
1298   PetscScalar       *array;
1299   /* BLAS integers */
1300   PetscBLASInt      lwork,lierr;
1301   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1302   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1303   /* LAPACK working arrays for SVD or POD */
1304   PetscBool         skip_lapack;
1305   PetscScalar       *work;
1306   PetscReal         *singular_vals;
1307 #if defined(PETSC_USE_COMPLEX)
1308   PetscReal         *rwork;
1309 #endif
1310 #if defined(PETSC_MISSING_LAPACK_GESVD)
1311   PetscBLASInt      Blas_one_2=1;
1312   PetscScalar       *temp_basis,*correlation_mat;
1313 #else
1314   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1315   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1316 #endif
1317   /* reuse */
1318   PetscInt          olocal_primal_size;
1319   PetscInt          *oprimal_indices_local_idxs;
1320   /* change of basis */
1321   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1322   PetscBool         boolforchange,qr_needed;
1323   PetscBT           touched,change_basis,qr_needed_idx;
1324   /* auxiliary stuff */
1325   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1326   /* some quantities */
1327   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1328   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1329 
1330 
1331   PetscFunctionBegin;
1332   /* Destroy Mat objects computed previously */
1333   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1334   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1335   /* Get index sets for faces, edges and vertices from graph */
1336   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1337     pcbddc->use_vertices = PETSC_TRUE;
1338   }
1339   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1340   /* HACK: provide functions to set change of basis */
1341   if (!ISForVertices && pcbddc->NullSpace) {
1342     pcbddc->use_change_of_basis = PETSC_TRUE;
1343     pcbddc->use_change_on_faces = PETSC_FALSE;
1344   }
1345   /* print some info */
1346   if (pcbddc->dbg_flag) {
1347     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1348     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1349     i = 0;
1350     if (ISForVertices) {
1351       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1352     }
1353     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1354     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1355     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1356     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1357   }
1358   /* check if near null space is attached to global mat */
1359   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1360   if (nearnullsp) {
1361     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1362     /* remove any stored info */
1363     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1364     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1365     /* store information for BDDC solver reuse */
1366     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1367     pcbddc->onearnullspace = nearnullsp;
1368     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1369     for (i=0;i<nnsp_size;i++) {
1370       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1371     }
1372   } else { /* if near null space is not provided BDDC uses constants by default */
1373     nnsp_size = 0;
1374     nnsp_has_cnst = PETSC_TRUE;
1375   }
1376   /* get max number of constraints on a single cc */
1377   max_constraints = nnsp_size;
1378   if (nnsp_has_cnst) max_constraints++;
1379 
1380   /*
1381        Evaluate maximum storage size needed by the procedure
1382        - temp_indices will contain start index of each constraint stored as follows
1383        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1384        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1385        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1386                                                                                                                                                          */
1387   total_counts = n_ISForFaces+n_ISForEdges;
1388   total_counts *= max_constraints;
1389   n_vertices = 0;
1390   if (ISForVertices) {
1391     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1392   }
1393   total_counts += n_vertices;
1394   ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr);
1395   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1396   total_counts = 0;
1397   max_size_of_constraint = 0;
1398   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1399     if (i<n_ISForEdges) {
1400       used_IS = &ISForEdges[i];
1401     } else {
1402       used_IS = &ISForFaces[i-n_ISForEdges];
1403     }
1404     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1405     total_counts += j;
1406     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1407   }
1408   total_counts *= max_constraints;
1409   total_counts += n_vertices;
1410   ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr);
1411   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr);
1412   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1413   /* get local part of global near null space vectors */
1414   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1415   for (k=0;k<nnsp_size;k++) {
1416     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1417     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1418     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1419   }
1420 
1421   /* whether or not to skip lapack calls */
1422   skip_lapack = PETSC_TRUE;
1423   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1424 
1425   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1426   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1427     PetscScalar temp_work;
1428 #if defined(PETSC_MISSING_LAPACK_GESVD)
1429     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1430     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1431     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1432     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1433 #if defined(PETSC_USE_COMPLEX)
1434     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1435 #endif
1436     /* now we evaluate the optimal workspace using query with lwork=-1 */
1437     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1438     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1439     lwork = -1;
1440     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1441 #if !defined(PETSC_USE_COMPLEX)
1442     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1443 #else
1444     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1445 #endif
1446     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1447     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1448 #else /* on missing GESVD */
1449     /* SVD */
1450     PetscInt max_n,min_n;
1451     max_n = max_size_of_constraint;
1452     min_n = max_constraints;
1453     if (max_size_of_constraint < max_constraints) {
1454       min_n = max_size_of_constraint;
1455       max_n = max_constraints;
1456     }
1457     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1458 #if defined(PETSC_USE_COMPLEX)
1459     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1460 #endif
1461     /* now we evaluate the optimal workspace using query with lwork=-1 */
1462     lwork = -1;
1463     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1464     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1465     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1466     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1467 #if !defined(PETSC_USE_COMPLEX)
1468     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1469 #else
1470     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1471 #endif
1472     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1473     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1474 #endif /* on missing GESVD */
1475     /* Allocate optimal workspace */
1476     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1477     ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr);
1478   }
1479   /* Now we can loop on constraining sets */
1480   total_counts = 0;
1481   temp_indices[0] = 0;
1482   /* vertices */
1483   if (ISForVertices) {
1484     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1485     if (nnsp_has_cnst) { /* consider all vertices */
1486       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1487       for (i=0;i<n_vertices;i++) {
1488         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1489         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1490         total_counts++;
1491       }
1492     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1493       PetscBool used_vertex;
1494       for (i=0;i<n_vertices;i++) {
1495         used_vertex = PETSC_FALSE;
1496         k = 0;
1497         while (!used_vertex && k<nnsp_size) {
1498           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1499           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1500             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1501             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1502             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1503             total_counts++;
1504             used_vertex = PETSC_TRUE;
1505           }
1506           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1507           k++;
1508         }
1509       }
1510     }
1511     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1512     n_vertices = total_counts;
1513   }
1514 
1515   /* edges and faces */
1516   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1517     if (i<n_ISForEdges) {
1518       used_IS = &ISForEdges[i];
1519       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1520     } else {
1521       used_IS = &ISForFaces[i-n_ISForEdges];
1522       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1523     }
1524     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1525     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1526     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1527     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1528     /* change of basis should not be performed on local periodic nodes */
1529     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1530     if (nnsp_has_cnst) {
1531       PetscScalar quad_value;
1532       temp_constraints++;
1533       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1534       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1535       for (j=0;j<size_of_constraint;j++) {
1536         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1537       }
1538       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1539       total_counts++;
1540     }
1541     for (k=0;k<nnsp_size;k++) {
1542       PetscReal real_value;
1543       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1544       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1545       for (j=0;j<size_of_constraint;j++) {
1546         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1547       }
1548       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1549       /* check if array is null on the connected component */
1550       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1551       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1552       if (real_value > 0.0) { /* keep indices and values */
1553         temp_constraints++;
1554         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1555         total_counts++;
1556       }
1557     }
1558     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1559     valid_constraints = temp_constraints;
1560     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1561     if (!pcbddc->use_nnsp_true && temp_constraints) {
1562       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1563 
1564 #if defined(PETSC_MISSING_LAPACK_GESVD)
1565       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1566          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1567          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1568             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1569             from that computed using LAPACKgesvd
1570          -> This is due to a different computation of eigenvectors in LAPACKheev
1571          -> The quality of the POD-computed basis will be the same */
1572       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1573       /* Store upper triangular part of correlation matrix */
1574       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1575       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1576       for (j=0;j<temp_constraints;j++) {
1577         for (k=0;k<j+1;k++) {
1578           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1579         }
1580       }
1581       /* compute eigenvalues and eigenvectors of correlation matrix */
1582       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1583       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1584 #if !defined(PETSC_USE_COMPLEX)
1585       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1586 #else
1587       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1588 #endif
1589       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1590       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1591       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1592       j=0;
1593       while (j < temp_constraints && singular_vals[j] < tol) j++;
1594       total_counts=total_counts-j;
1595       valid_constraints = temp_constraints-j;
1596       /* scale and copy POD basis into used quadrature memory */
1597       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1598       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1599       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1600       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1601       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1602       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1603       if (j<temp_constraints) {
1604         PetscInt ii;
1605         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1606         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1607         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1608         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1609         for (k=0;k<temp_constraints-j;k++) {
1610           for (ii=0;ii<size_of_constraint;ii++) {
1611             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1612           }
1613         }
1614       }
1615 #else  /* on missing GESVD */
1616       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1617       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1618       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1619       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1620 #if !defined(PETSC_USE_COMPLEX)
1621       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1622 #else
1623       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1624 #endif
1625       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1626       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1627       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1628       k = temp_constraints;
1629       if (k > size_of_constraint) k = size_of_constraint;
1630       j = 0;
1631       while (j < k && singular_vals[k-j-1] < tol) j++;
1632       total_counts = total_counts-temp_constraints+k-j;
1633       valid_constraints = k-j;
1634 #endif /* on missing GESVD */
1635     }
1636     /* setting change_of_basis flag is safe now */
1637     if (boolforchange) {
1638       for (j=0;j<valid_constraints;j++) {
1639         PetscBTSet(change_basis,total_counts-j-1);
1640       }
1641     }
1642   }
1643   /* free index sets of faces, edges and vertices */
1644   for (i=0;i<n_ISForFaces;i++) {
1645     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1646   }
1647   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1648   for (i=0;i<n_ISForEdges;i++) {
1649     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1650   }
1651   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1652   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1653   /* map temp_indices_to_constraint in boundary numbering */
1654   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1655   if (i != temp_indices[total_counts]) {
1656     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1657   }
1658 
1659   /* free workspace */
1660   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1661     ierr = PetscFree(work);CHKERRQ(ierr);
1662 #if defined(PETSC_USE_COMPLEX)
1663     ierr = PetscFree(rwork);CHKERRQ(ierr);
1664 #endif
1665     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1666 #if defined(PETSC_MISSING_LAPACK_GESVD)
1667     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1668     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1669 #endif
1670   }
1671   for (k=0;k<nnsp_size;k++) {
1672     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1673   }
1674   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1675 
1676   /* set quantities in pcbddc data structure and store previous primal size */
1677   /* n_vertices defines the number of subdomain corners in the primal space */
1678   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1679   olocal_primal_size = pcbddc->local_primal_size;
1680   pcbddc->local_primal_size = total_counts;
1681   pcbddc->n_vertices = n_vertices;
1682   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1683 
1684   /* Create constraint matrix */
1685   /* The constraint matrix is used to compute the l2g map of primal dofs */
1686   /* so we need to set it up properly either with or without change of basis */
1687   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1688   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1689   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1690   /* array to compute a local numbering of constraints : vertices first then constraints */
1691   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1692   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1693   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1694   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1695   /* auxiliary stuff for basis change */
1696   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1697   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1698 
1699   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1700   total_primal_vertices=0;
1701   for (i=0;i<pcbddc->local_primal_size;i++) {
1702     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1703     if (size_of_constraint == 1) {
1704       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1705       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1706       aux_primal_minloc[total_primal_vertices]=0;
1707       total_primal_vertices++;
1708     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1709       PetscInt min_loc,min_index;
1710       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1711       /* find first untouched local node */
1712       k = 0;
1713       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1714       min_index = global_indices[k];
1715       min_loc = k;
1716       /* search the minimum among global nodes already untouched on the cc */
1717       for (k=1;k<size_of_constraint;k++) {
1718         /* there can be more than one constraint on a single connected component */
1719         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1720           min_index = global_indices[k];
1721           min_loc = k;
1722         }
1723       }
1724       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1725       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1726       aux_primal_minloc[total_primal_vertices]=min_loc;
1727       total_primal_vertices++;
1728     }
1729   }
1730   /* determine if a QR strategy is needed for change of basis */
1731   qr_needed = PETSC_FALSE;
1732   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1733   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1734     if (PetscBTLookup(change_basis,i)) {
1735       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1736       j = 0;
1737       for (k=0;k<size_of_constraint;k++) {
1738         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1739           j++;
1740         }
1741       }
1742       /* found more than one primal dof on the cc */
1743       if (j > 1) {
1744         PetscBTSet(qr_needed_idx,i);
1745         qr_needed = PETSC_TRUE;
1746       }
1747     }
1748   }
1749   /* free workspace */
1750   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1751 
1752   /* permute indices in order to have a sorted set of vertices */
1753   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1754 
1755   /* nonzero structure of constraint matrix */
1756   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1757   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1758   j=total_primal_vertices;
1759   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1760     if (!PetscBTLookup(change_basis,i)) {
1761       nnz[j]=temp_indices[i+1]-temp_indices[i];
1762       j++;
1763     }
1764   }
1765   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1766   ierr = PetscFree(nnz);CHKERRQ(ierr);
1767   /* set values in constraint matrix */
1768   for (i=0;i<total_primal_vertices;i++) {
1769     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1770   }
1771   total_counts = total_primal_vertices;
1772   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1773     if (!PetscBTLookup(change_basis,i)) {
1774       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1775       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1776       total_counts++;
1777     }
1778   }
1779   /* assembling */
1780   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1781   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1782   /*
1783   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1784   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1785   */
1786   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1787   if (pcbddc->use_change_of_basis) {
1788     /* dual and primal dofs on a single cc */
1789     PetscInt     dual_dofs,primal_dofs;
1790     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1791     PetscInt     primal_counter;
1792     /* working stuff for GEQRF */
1793     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1794     PetscBLASInt lqr_work;
1795     /* working stuff for UNGQR */
1796     PetscScalar  *gqr_work,lgqr_work_t;
1797     PetscBLASInt lgqr_work;
1798     /* working stuff for TRTRS */
1799     PetscScalar  *trs_rhs;
1800     PetscBLASInt Blas_NRHS;
1801     /* pointers for values insertion into change of basis matrix */
1802     PetscInt     *start_rows,*start_cols;
1803     PetscScalar  *start_vals;
1804     /* working stuff for values insertion */
1805     PetscBT      is_primal;
1806 
1807     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1808     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1809     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1810     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1811     /* work arrays */
1812     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
1813     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1814     /* nonzeros per row */
1815     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1816       if (PetscBTLookup(change_basis,i)) {
1817         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1818         if (PetscBTLookup(qr_needed_idx,i)) {
1819           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1820         } else {
1821           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1822           /* get local primal index on the cc */
1823           j = 0;
1824           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1825           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1826         }
1827       }
1828     }
1829     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1830     ierr = PetscFree(nnz);CHKERRQ(ierr);
1831     /* Set initial identity in the matrix */
1832     for (i=0;i<pcis->n_B;i++) {
1833       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1834     }
1835 
1836     if (pcbddc->dbg_flag) {
1837       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1838       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1839     }
1840 
1841 
1842     /* Now we loop on the constraints which need a change of basis */
1843     /*
1844        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1845        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1846 
1847        Basic blocks of change of basis matrix T computed
1848 
1849           - Using the following block transformation if there is only a primal dof on the cc
1850             (in the example, primal dof is the last one of the edge in LOCAL ordering
1851              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1852             | 1        0   ...        0     1 |
1853             | 0        1   ...        0     1 |
1854             |              ...                |
1855             | 0        ...            1     1 |
1856             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1857 
1858           - via QR decomposition of constraints otherwise
1859     */
1860     if (qr_needed) {
1861       /* space to store Q */
1862       ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr);
1863       /* first we issue queries for optimal work */
1864       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1865       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1866       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1867       lqr_work = -1;
1868       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1869       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1870       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1871       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
1872       lgqr_work = -1;
1873       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1874       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1875       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1876       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1877       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1878       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1879       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1880       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1881       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
1882       /* array to store scaling factors for reflectors */
1883       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
1884       /* array to store rhs and solution of triangular solver */
1885       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
1886       /* allocating workspace for check */
1887       if (pcbddc->dbg_flag) {
1888         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
1889       }
1890     }
1891     /* array to store whether a node is primal or not */
1892     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1893     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
1894     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1895     if (i != total_primal_vertices) {
1896       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1897     }
1898     for (i=0;i<total_primal_vertices;i++) {
1899       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1900     }
1901     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1902 
1903     /* loop on constraints and see whether or not they need a change of basis and compute it */
1904     /* -> using implicit ordering contained in temp_indices data */
1905     total_counts = pcbddc->n_vertices;
1906     primal_counter = total_counts;
1907     while (total_counts<pcbddc->local_primal_size) {
1908       primal_dofs = 1;
1909       if (PetscBTLookup(change_basis,total_counts)) {
1910         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1911         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1912           primal_dofs++;
1913         }
1914         /* get constraint info */
1915         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1916         dual_dofs = size_of_constraint-primal_dofs;
1917 
1918         if (pcbddc->dbg_flag) {
1919           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1920         }
1921 
1922         if (primal_dofs > 1) { /* QR */
1923 
1924           /* copy quadrature constraints for change of basis check */
1925           if (pcbddc->dbg_flag) {
1926             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1927           }
1928           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1929           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1930 
1931           /* compute QR decomposition of constraints */
1932           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1933           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1934           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1935           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1936           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1937           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1938           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1939 
1940           /* explictly compute R^-T */
1941           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1942           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1943           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1944           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1945           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1946           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1947           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1948           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1949           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1950           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1951 
1952           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1953           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1954           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1955           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1956           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1957           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1958           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1959           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1960           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1961 
1962           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1963              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1964              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1965           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1966           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1967           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1968           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1969           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1970           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1971           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1972           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1973           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1974           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1975 
1976           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1977           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1978           /* insert cols for primal dofs */
1979           for (j=0;j<primal_dofs;j++) {
1980             start_vals = &qr_basis[j*size_of_constraint];
1981             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1982             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1983           }
1984           /* insert cols for dual dofs */
1985           for (j=0,k=0;j<dual_dofs;k++) {
1986             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1987               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1988               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1989               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1990               j++;
1991             }
1992           }
1993 
1994           /* check change of basis */
1995           if (pcbddc->dbg_flag) {
1996             PetscInt   ii,jj;
1997             PetscBool valid_qr=PETSC_TRUE;
1998             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1999             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2000             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2001             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2002             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2003             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2004             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2005             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2006             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2007             for (jj=0;jj<size_of_constraint;jj++) {
2008               for (ii=0;ii<primal_dofs;ii++) {
2009                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2010                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2011               }
2012             }
2013             if (!valid_qr) {
2014               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2015               for (jj=0;jj<size_of_constraint;jj++) {
2016                 for (ii=0;ii<primal_dofs;ii++) {
2017                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2018                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2019                   }
2020                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2021                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2022                   }
2023                 }
2024               }
2025             } else {
2026               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2027             }
2028           }
2029         } else { /* simple transformation block */
2030           PetscInt row,col;
2031           PetscScalar val;
2032           for (j=0;j<size_of_constraint;j++) {
2033             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2034             if (!PetscBTLookup(is_primal,row)) {
2035               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2036               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2037               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2038             } else {
2039               for (k=0;k<size_of_constraint;k++) {
2040                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2041                 if (row != col) {
2042                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2043                 } else {
2044                   val = 1.0;
2045                 }
2046                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2047               }
2048             }
2049           }
2050         }
2051         /* increment primal counter */
2052         primal_counter += primal_dofs;
2053       } else {
2054         if (pcbddc->dbg_flag) {
2055           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2056         }
2057       }
2058       /* increment constraint counter total_counts */
2059       total_counts += primal_dofs;
2060     }
2061 
2062     /* free workspace */
2063     if (qr_needed) {
2064       if (pcbddc->dbg_flag) {
2065         ierr = PetscFree(work);CHKERRQ(ierr);
2066       }
2067       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2068       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2069       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2070       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2071       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2072     }
2073     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2074     /* assembling */
2075     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2077     /*
2078     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2079     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2080     */
2081   }
2082 
2083   /* get indices in local ordering for vertices and constraints */
2084   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2085     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2086     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2087   }
2088   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2089   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2090   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2091   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2092   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2093   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2094   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2095   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2096   /* set quantities in PCBDDC data struct */
2097   pcbddc->n_actual_vertices = i;
2098   /* check if a new primal space has been introduced */
2099   pcbddc->new_primal_space_local = PETSC_TRUE;
2100   if (olocal_primal_size == pcbddc->local_primal_size) {
2101     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2102     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2103     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2104   }
2105   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2106   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2107 
2108   /* flush dbg viewer */
2109   if (pcbddc->dbg_flag) {
2110     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2111   }
2112 
2113   /* free workspace */
2114   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2115   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2116   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2117   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2118   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2119   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2120   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2121   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 #undef __FUNCT__
2126 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2127 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2128 {
2129   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2130   PC_IS       *pcis = (PC_IS*)pc->data;
2131   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2132   PetscInt    bs,ierr,i,vertex_size;
2133   PetscViewer viewer=pcbddc->dbg_viewer;
2134 
2135   PetscFunctionBegin;
2136   /* Reset previously computed graph */
2137   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2138   /* Init local Graph struct */
2139   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2140 
2141   /* Check validity of the csr graph passed in by the user */
2142   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2143     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2144   }
2145 
2146   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2147   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2148     Mat mat_adj;
2149     const PetscInt *xadj,*adjncy;
2150     PetscBool flg_row=PETSC_TRUE;
2151 
2152     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2153     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2154     if (!flg_row) {
2155       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2156     }
2157     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2158     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2159     if (!flg_row) {
2160       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2161     }
2162     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2163   }
2164 
2165   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2166   vertex_size = 1;
2167   if (!pcbddc->user_provided_isfordofs) {
2168     if (!pcbddc->n_ISForDofs) {
2169       IS *custom_ISForDofs;
2170 
2171       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2172       ierr = PetscMalloc1(bs,&custom_ISForDofs);CHKERRQ(ierr);
2173       for (i=0;i<bs;i++) {
2174         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2175       }
2176       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2177       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2178       /* remove my references to IS objects */
2179       for (i=0;i<bs;i++) {
2180         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2181       }
2182       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2183     }
2184   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2185     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2186   }
2187 
2188   /* Setup of Graph */
2189   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2190 
2191   /* Graph's connected components analysis */
2192   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2193 
2194   /* print some info to stdout */
2195   if (pcbddc->dbg_flag) {
2196     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2197   }
2198 
2199   /* mark topography has done */
2200   pcbddc->recompute_topography = PETSC_FALSE;
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2206 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2207 {
2208   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2209   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2210   PetscErrorCode ierr;
2211 
2212   PetscFunctionBegin;
2213   n = 0;
2214   vertices = 0;
2215   if (pcbddc->ConstraintMatrix) {
2216     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2217     for (i=0;i<local_primal_size;i++) {
2218       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2219       if (size_of_constraint == 1) n++;
2220       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2221     }
2222     if (vertices_idx) {
2223       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2224       n = 0;
2225       for (i=0;i<local_primal_size;i++) {
2226         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2227         if (size_of_constraint == 1) {
2228           vertices[n++]=row_cmat_indices[0];
2229         }
2230         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2231       }
2232     }
2233   }
2234   *n_vertices = n;
2235   if (vertices_idx) *vertices_idx = vertices;
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 #undef __FUNCT__
2240 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2241 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2242 {
2243   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2244   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2245   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2246   PetscBT        touched;
2247   PetscErrorCode ierr;
2248 
2249     /* This function assumes that the number of local constraints per connected component
2250        is not greater than the number of nodes defined for the connected component
2251        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2252   PetscFunctionBegin;
2253   n = 0;
2254   constraints_index = 0;
2255   if (pcbddc->ConstraintMatrix) {
2256     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2257     max_size_of_constraint = 0;
2258     for (i=0;i<local_primal_size;i++) {
2259       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2260       if (size_of_constraint > 1) {
2261         n++;
2262       }
2263       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2264       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2265     }
2266     if (constraints_idx) {
2267       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2268       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2269       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2270       n = 0;
2271       for (i=0;i<local_primal_size;i++) {
2272         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2273         if (size_of_constraint > 1) {
2274           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2275           /* find first untouched local node */
2276           j = 0;
2277           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2278           min_index = row_cmat_global_indices[j];
2279           min_loc = j;
2280           /* search the minimum among nodes not yet touched on the connected component
2281              since there can be more than one constraint on a single cc */
2282           for (j=1;j<size_of_constraint;j++) {
2283             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2284               min_index = row_cmat_global_indices[j];
2285               min_loc = j;
2286             }
2287           }
2288           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2289           constraints_index[n++] = row_cmat_indices[min_loc];
2290         }
2291         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2292       }
2293       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2294       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2295     }
2296   }
2297   *n_constraints = n;
2298   if (constraints_idx) *constraints_idx = constraints_index;
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 /* the next two functions has been adapted from pcis.c */
2303 #undef __FUNCT__
2304 #define __FUNCT__ "PCBDDCApplySchur"
2305 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2306 {
2307   PetscErrorCode ierr;
2308   PC_IS          *pcis = (PC_IS*)(pc->data);
2309 
2310   PetscFunctionBegin;
2311   if (!vec2_B) { vec2_B = v; }
2312   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2313   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2314   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2315   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2316   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 #undef __FUNCT__
2321 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2322 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2323 {
2324   PetscErrorCode ierr;
2325   PC_IS          *pcis = (PC_IS*)(pc->data);
2326 
2327   PetscFunctionBegin;
2328   if (!vec2_B) { vec2_B = v; }
2329   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2330   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2331   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2332   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2333   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "PCBDDCSubsetNumbering"
2339 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2340 {
2341   Vec            local_vec,global_vec;
2342   IS             seqis,paris;
2343   VecScatter     scatter_ctx;
2344   PetscScalar    *array;
2345   PetscInt       *temp_global_dofs;
2346   PetscScalar    globalsum;
2347   PetscInt       i,j,s;
2348   PetscInt       nlocals,first_index,old_index,max_local;
2349   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2350   PetscMPIInt    *dof_sizes,*dof_displs;
2351   PetscBool      first_found;
2352   PetscErrorCode ierr;
2353 
2354   PetscFunctionBegin;
2355   /* mpi buffers */
2356   MPI_Comm_size(comm,&size_prec_comm);
2357   MPI_Comm_rank(comm,&rank_prec_comm);
2358   j = ( !rank_prec_comm ? size_prec_comm : 0);
2359   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2360   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2361   /* get maximum size of subset */
2362   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2363   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2364   max_local = 0;
2365   if (n_local_dofs) {
2366     max_local = temp_global_dofs[0];
2367     for (i=1;i<n_local_dofs;i++) {
2368       if (max_local < temp_global_dofs[i] ) {
2369         max_local = temp_global_dofs[i];
2370       }
2371     }
2372   }
2373   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2374   max_global++;
2375   max_local = 0;
2376   if (n_local_dofs) {
2377     max_local = local_dofs[0];
2378     for (i=1;i<n_local_dofs;i++) {
2379       if (max_local < local_dofs[i] ) {
2380         max_local = local_dofs[i];
2381       }
2382     }
2383   }
2384   max_local++;
2385   /* allocate workspace */
2386   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2387   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2388   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2389   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2390   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2391   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2392   /* create scatter */
2393   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2394   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2395   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2396   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2397   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2398   /* init array */
2399   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2400   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2401   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2402   if (local_dofs_mult) {
2403     for (i=0;i<n_local_dofs;i++) {
2404       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2405     }
2406   } else {
2407     for (i=0;i<n_local_dofs;i++) {
2408       array[local_dofs[i]]=1.0;
2409     }
2410   }
2411   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2412   /* scatter into global vec and get total number of global dofs */
2413   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2414   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2415   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2416   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2417   /* Fill global_vec with cumulative function for global numbering */
2418   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2419   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2420   nlocals = 0;
2421   first_index = -1;
2422   first_found = PETSC_FALSE;
2423   for (i=0;i<s;i++) {
2424     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2425       first_found = PETSC_TRUE;
2426       first_index = i;
2427     }
2428     nlocals += (PetscInt)PetscRealPart(array[i]);
2429   }
2430   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2431   if (!rank_prec_comm) {
2432     dof_displs[0]=0;
2433     for (i=1;i<size_prec_comm;i++) {
2434       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2435     }
2436   }
2437   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2438   if (first_found) {
2439     array[first_index] += (PetscScalar)nlocals;
2440     old_index = first_index;
2441     for (i=first_index+1;i<s;i++) {
2442       if (PetscRealPart(array[i]) > 0.0) {
2443         array[i] += array[old_index];
2444         old_index = i;
2445       }
2446     }
2447   }
2448   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2449   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2450   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2451   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2452   /* get global ordering of local dofs */
2453   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2454   if (local_dofs_mult) {
2455     for (i=0;i<n_local_dofs;i++) {
2456       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2457     }
2458   } else {
2459     for (i=0;i<n_local_dofs;i++) {
2460       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2461     }
2462   }
2463   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2464   /* free workspace */
2465   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2466   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2467   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2468   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2469   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2470   /* return pointer to global ordering of local dofs */
2471   *global_numbering_subset = temp_global_dofs;
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2477 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2478 {
2479   PetscInt       i,j;
2480   PetscScalar    *alphas;
2481   PetscErrorCode ierr;
2482 
2483   PetscFunctionBegin;
2484   /* this implements stabilized Gram-Schmidt */
2485   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2486   for (i=0;i<n;i++) {
2487     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2488     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2489     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2490   }
2491   ierr = PetscFree(alphas);CHKERRQ(ierr);
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 /* TODO
2496    - now preallocation is done assuming SEQDENSE local matrices
2497 */
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatISGetMPIXAIJ"
2500 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2501 {
2502   Mat                    new_mat;
2503   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2504   /* info on mat */
2505   /* ISLocalToGlobalMapping rmapping,cmapping; */
2506   PetscInt               bs,rows,cols;
2507   PetscInt               lrows,lcols;
2508   PetscInt               local_rows,local_cols;
2509   PetscBool              isdense;
2510   /* values insertion */
2511   PetscScalar            *array;
2512   PetscInt               *local_indices,*global_indices;
2513   /* work */
2514   PetscInt               i,j,index_row;
2515   PetscErrorCode         ierr;
2516 
2517   PetscFunctionBegin;
2518   /* MISSING CHECKS
2519     - rectangular case not covered (it is not allowed by MATIS)
2520   */
2521   /* get info from mat */
2522   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2523   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2524   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2525   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2526 
2527   /* work */
2528   ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr);
2529   for (i=0;i<local_rows;i++) local_indices[i]=i;
2530   /* map indices of local mat to global values */
2531   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2532   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2533   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2534 
2535   if (reuse==MAT_INITIAL_MATRIX) {
2536     Vec         vec_dnz,vec_onz;
2537     PetscScalar *my_dnz,*my_onz;
2538     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2539     PetscInt    index_col,owner;
2540     PetscMPIInt nsubdomains;
2541 
2542     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2543     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2544     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2545     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2546     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2547     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2548     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2549 
2550     /*
2551       preallocation
2552     */
2553 
2554     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2555     /*
2556        Some vectors are needed to sum up properly on shared interface dofs.
2557        Preallocation macros cannot do the job.
2558        Note that preallocation is not exact, since it overestimates nonzeros
2559     */
2560     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2561     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2562     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2563     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2564     /* All processes need to compute entire row ownership */
2565     ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr);
2566     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2567     for (i=0;i<nsubdomains;i++) {
2568       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2569         row_ownership[j]=i;
2570       }
2571     }
2572 
2573     /*
2574        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2575        then, they will be summed up properly. This way, preallocation is always sufficient
2576     */
2577     ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr);
2578     ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr);
2579     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2580     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2581     for (i=0;i<local_rows;i++) {
2582       index_row = global_indices[i];
2583       for (j=i;j<local_rows;j++) {
2584         owner = row_ownership[index_row];
2585         index_col = global_indices[j];
2586         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2587           my_dnz[i] += 1.0;
2588         } else { /* offdiag block */
2589           my_onz[i] += 1.0;
2590         }
2591         /* same as before, interchanging rows and cols */
2592         if (i != j) {
2593           owner = row_ownership[index_col];
2594           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2595             my_dnz[j] += 1.0;
2596           } else {
2597             my_onz[j] += 1.0;
2598           }
2599         }
2600       }
2601     }
2602     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2603     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2604     if (local_rows) { /* multilevel guard */
2605       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2606       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2607     }
2608     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2609     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2610     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2611     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2612     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2613     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2614     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2615 
2616     /* set computed preallocation in dnz and onz */
2617     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2618     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2619     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2620     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2621     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2622     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2623     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2624     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2625 
2626     /* Resize preallocation if overestimated */
2627     for (i=0;i<lrows;i++) {
2628       dnz[i] = PetscMin(dnz[i],lcols);
2629       onz[i] = PetscMin(onz[i],cols-lcols);
2630     }
2631     /* set preallocation */
2632     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2633     for (i=0;i<lrows/bs;i++) {
2634       dnz[i] = dnz[i*bs]/bs;
2635       onz[i] = onz[i*bs]/bs;
2636     }
2637     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2638     for (i=0;i<lrows/bs;i++) {
2639       dnz[i] = dnz[i]-i;
2640     }
2641     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2642     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2643     *M = new_mat;
2644   } else {
2645     PetscInt mbs,mrows,mcols;
2646     /* some checks */
2647     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2648     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2649     if (mrows != rows) {
2650       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2651     }
2652     if (mrows != rows) {
2653       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2654     }
2655     if (mbs != bs) {
2656       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2657     }
2658     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2659   }
2660   /* set local to global mappings */
2661   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2662   /* Set values */
2663   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2664   if (isdense) { /* special case for dense local matrices */
2665     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2666     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2667     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2668     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2669     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2670     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2671   } else { /* very basic values insertion for all other matrix types */
2672     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2673     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2674     for (i=0;i<local_rows;i++) {
2675       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2676       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2677       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2678       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2679       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2680       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2681     }
2682   }
2683   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2684   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2685   if (isdense) {
2686     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2687   }
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 #undef __FUNCT__
2692 #define __FUNCT__ "MatISSubassemble_Private"
2693 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2694 {
2695   Mat             subdomain_adj;
2696   IS              new_ranks,ranks_send_to;
2697   MatPartitioning partitioner;
2698   Mat_IS          *matis;
2699   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2700   PetscInt        prank;
2701   PetscMPIInt     size,rank,color;
2702   PetscInt        *xadj,*adjncy,*oldranks;
2703   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2704   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2705   PetscErrorCode  ierr;
2706   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2707   PetscSubcomm    subcomm;
2708 
2709   PetscFunctionBegin;
2710   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2711   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2712   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2713 
2714   /* Get info on mapping */
2715   matis = (Mat_IS*)(mat->data);
2716   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2717   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2718 
2719   /* build local CSR graph of subdomains' connectivity */
2720   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2721   xadj[0] = 0;
2722   xadj[1] = PetscMax(n_neighs-1,0);
2723   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2724   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2725 
2726   if (threshold) {
2727     PetscInt* count,min_threshold;
2728     ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr);
2729     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2730     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2731       for (j=0;j<n_shared[i];j++) {
2732         count[shared[i][j]] += 1;
2733       }
2734     }
2735     /* adapt threshold since we dont want to lose significant connections */
2736     min_threshold = n_neighs;
2737     for (i=1;i<n_neighs;i++) {
2738       for (j=0;j<n_shared[i];j++) {
2739         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2740       }
2741     }
2742     threshold = PetscMax(min_threshold+1,threshold);
2743     xadj[1] = 0;
2744     for (i=1;i<n_neighs;i++) {
2745       for (j=0;j<n_shared[i];j++) {
2746         if (count[shared[i][j]] < threshold) {
2747           adjncy[xadj[1]] = neighs[i];
2748           adjncy_wgt[xadj[1]] = n_shared[i];
2749           xadj[1]++;
2750           break;
2751         }
2752       }
2753     }
2754     ierr = PetscFree(count);CHKERRQ(ierr);
2755   } else {
2756     if (xadj[1]) {
2757       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2758       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2759     }
2760   }
2761   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2762   if (use_square) {
2763     for (i=0;i<xadj[1];i++) {
2764       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2765     }
2766   }
2767   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2768 
2769   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2770 
2771   /*
2772     Restrict work on active processes only.
2773   */
2774   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2775   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2776   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2777   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2778   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2779   if (color) {
2780     ierr = PetscFree(xadj);CHKERRQ(ierr);
2781     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2782     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2783   } else {
2784     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2785     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2786     prank = rank;
2787     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2788     for (i=0;i<size;i++) {
2789       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2790     }
2791     for (i=0;i<xadj[1];i++) {
2792       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2793     }
2794     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2795     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2796     n_subdomains = (PetscInt)size;
2797     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2798 
2799     /* Partition */
2800     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2801     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2802     if (use_vwgt) {
2803       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2804       v_wgt[0] = local_size;
2805       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2806     }
2807     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2808     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2809     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2810     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2811     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2812 
2813     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2814     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2815     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2816     /* clean up */
2817     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2818     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2819     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2820     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2821   }
2822   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2823 
2824   /* assemble parallel IS for sends */
2825   i = 1;
2826   if (color) i=0;
2827   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2828   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2829 
2830   /* get back IS */
2831   *is_sends = ranks_send_to;
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2836 
2837 #undef __FUNCT__
2838 #define __FUNCT__ "MatISSubassemble"
2839 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2840 {
2841   Mat                    local_mat,new_mat;
2842   Mat_IS                 *matis;
2843   IS                     is_sends_internal;
2844   PetscInt               rows,cols;
2845   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2846   PetscBool              ismatis,isdense;
2847   ISLocalToGlobalMapping l2gmap;
2848   PetscInt*              l2gmap_indices;
2849   const PetscInt*        is_indices;
2850   MatType                new_local_type;
2851   MatTypePrivate         new_local_type_private;
2852   /* buffers */
2853   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2854   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2855   /* MPI */
2856   MPI_Comm               comm;
2857   PetscMPIInt            n_sends,n_recvs,commsize;
2858   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2859   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2860   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2861   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2862   PetscErrorCode         ierr;
2863 
2864   PetscFunctionBegin;
2865   /* checks */
2866   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2867   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2868   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2869   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2870   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2871   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2872   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2873   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2874   PetscValidLogicalCollectiveInt(mat,bs,0);
2875   /* prepare IS for sending if not provided */
2876   if (!is_sends) {
2877     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2878     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2879   } else {
2880     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2881     is_sends_internal = is_sends;
2882   }
2883 
2884   /* get pointer of MATIS data */
2885   matis = (Mat_IS*)mat->data;
2886 
2887   /* get comm */
2888   comm = PetscObjectComm((PetscObject)mat);
2889 
2890   /* compute number of sends */
2891   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2892   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2893 
2894   /* compute number of receives */
2895   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2896   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
2897   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2898   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2899   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2900   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2901   ierr = PetscFree(iflags);CHKERRQ(ierr);
2902 
2903   /* prepare send/receive buffers */
2904   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
2905   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2906   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
2907   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2908 
2909   /* Get data from local mat */
2910   if (!isdense) {
2911     /* TODO: See below some guidelines on how to prepare the local buffers */
2912     /*
2913        send_buffer_vals should contain the raw values of the local matrix
2914        send_buffer_idxs should contain:
2915        - MatType_PRIVATE type
2916        - PetscInt        size_of_l2gmap
2917        - PetscInt        global_row_indices[size_of_l2gmap]
2918        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2919     */
2920   } else {
2921     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2922     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2923     ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr);
2924     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2925     send_buffer_idxs[1] = i;
2926     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2927     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2928     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2929     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2930     for (i=0;i<n_sends;i++) {
2931       ilengths_vals[is_indices[i]] = len*len;
2932       ilengths_idxs[is_indices[i]] = len+2;
2933     }
2934   }
2935   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2936   buf_size_idxs = 0;
2937   buf_size_vals = 0;
2938   for (i=0;i<n_recvs;i++) {
2939     buf_size_idxs += (PetscInt)olengths_idxs[i];
2940     buf_size_vals += (PetscInt)olengths_vals[i];
2941   }
2942   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
2943   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
2944 
2945   /* get new tags for clean communications */
2946   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2947   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2948 
2949   /* allocate for requests */
2950   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
2951   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
2952   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
2953   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
2954 
2955   /* communications */
2956   ptr_idxs = recv_buffer_idxs;
2957   ptr_vals = recv_buffer_vals;
2958   for (i=0;i<n_recvs;i++) {
2959     source_dest = onodes[i];
2960     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2961     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2962     ptr_idxs += olengths_idxs[i];
2963     ptr_vals += olengths_vals[i];
2964   }
2965   for (i=0;i<n_sends;i++) {
2966     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2967     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2968     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2969   }
2970   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2971   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2972 
2973   /* assemble new l2g map */
2974   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2975   ptr_idxs = recv_buffer_idxs;
2976   buf_size_idxs = 0;
2977   for (i=0;i<n_recvs;i++) {
2978     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2979     ptr_idxs += olengths_idxs[i];
2980   }
2981   ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr);
2982   ptr_idxs = recv_buffer_idxs;
2983   buf_size_idxs = 0;
2984   for (i=0;i<n_recvs;i++) {
2985     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2986     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2987     ptr_idxs += olengths_idxs[i];
2988   }
2989   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2990   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2991   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2992 
2993   /* infer new local matrix type from received local matrices type */
2994   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2995   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2996   new_local_type_private = MATAIJ_PRIVATE;
2997   new_local_type = MATSEQAIJ;
2998   if (n_recvs) {
2999     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3000     ptr_idxs = recv_buffer_idxs;
3001     for (i=0;i<n_recvs;i++) {
3002       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3003         new_local_type_private = MATAIJ_PRIVATE;
3004         break;
3005       }
3006       ptr_idxs += olengths_idxs[i];
3007     }
3008     switch (new_local_type_private) {
3009       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
3010         new_local_type = MATSEQAIJ;
3011         bs = 1;
3012         break;
3013       case MATAIJ_PRIVATE:
3014         new_local_type = MATSEQAIJ;
3015         bs = 1;
3016         break;
3017       case MATBAIJ_PRIVATE:
3018         new_local_type = MATSEQBAIJ;
3019         break;
3020       case MATSBAIJ_PRIVATE:
3021         new_local_type = MATSEQSBAIJ;
3022         break;
3023       default:
3024         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3025         break;
3026     }
3027   }
3028 
3029   /* create MATIS object */
3030   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3031   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
3032   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3033   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
3034   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3035   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3036 
3037   /* set values */
3038   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3039   ptr_vals = recv_buffer_vals;
3040   ptr_idxs = recv_buffer_idxs;
3041   for (i=0;i<n_recvs;i++) {
3042     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3043       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3044       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3045       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3046       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3047       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3048     }
3049     ptr_idxs += olengths_idxs[i];
3050     ptr_vals += olengths_vals[i];
3051   }
3052   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3053   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3054   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3055   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3056 
3057   { /* check */
3058     Vec       lvec,rvec;
3059     PetscReal infty_error;
3060 
3061     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3062     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3063     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3064     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3065     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
3066     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3067     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3068     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3069     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3070   }
3071 
3072   /* free workspace */
3073   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3074   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3075   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3076   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3077   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3078   if (isdense) {
3079     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3080     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3081   } else {
3082     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3083   }
3084   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3085   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3086   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3087   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3088   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3089   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3090   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3091   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3092   ierr = PetscFree(onodes);CHKERRQ(ierr);
3093   /* get back new mat */
3094   *mat_n = new_mat;
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 #undef __FUNCT__
3099 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3100 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3101 {
3102   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3103   PC_IS                  *pcis = (PC_IS*)pc->data;
3104   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3105   MatNullSpace           CoarseNullSpace=NULL;
3106   ISLocalToGlobalMapping coarse_islg;
3107   IS                     coarse_is;
3108   PetscInt               max_it;
3109   PetscInt               im_active=-1,active_procs=-1;
3110   PC                     pc_temp;
3111   PCType                 coarse_pc_type;
3112   KSPType                coarse_ksp_type;
3113   PetscBool              multilevel_requested,multilevel_allowed;
3114   PetscBool              setsym,issym,isbddc,isnn,coarse_reuse;
3115   PetscErrorCode         ierr;
3116 
3117   PetscFunctionBegin;
3118   /* Assign global numbering to coarse dofs */
3119   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3120     PetscInt ocoarse_size;
3121     ocoarse_size = pcbddc->coarse_size;
3122     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3123     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3124     /* see if we can avoid some work */
3125     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3126       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3127         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3128         coarse_reuse = PETSC_FALSE;
3129       } else { /* we can safely reuse already computed coarse matrix */
3130         coarse_reuse = PETSC_TRUE;
3131       }
3132     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3133       coarse_reuse = PETSC_FALSE;
3134     }
3135   } else { /* primal space has not been changed, so we can reuse coarse matrix */
3136     coarse_reuse = PETSC_TRUE;
3137   }
3138 
3139   /* infer some info from user */
3140   issym = PETSC_FALSE;
3141   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3142   multilevel_allowed = PETSC_FALSE;
3143   multilevel_requested = PETSC_FALSE;
3144   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3145   if (multilevel_requested) {
3146     /* count "active processes" */
3147     im_active = !!(pcis->n);
3148     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3149     if (active_procs/pcbddc->coarsening_ratio < 2) {
3150       multilevel_allowed = PETSC_FALSE;
3151     } else {
3152       multilevel_allowed = PETSC_TRUE;
3153     }
3154   }
3155 
3156   /* set defaults for coarse KSP and PC */
3157   if (multilevel_allowed) {
3158     if (issym) {
3159       coarse_ksp_type = KSPRICHARDSON;
3160     } else {
3161       coarse_ksp_type = KSPCHEBYSHEV;
3162     }
3163     coarse_pc_type = PCBDDC;
3164   } else {
3165     coarse_ksp_type = KSPPREONLY;
3166     coarse_pc_type = PCREDUNDANT;
3167   }
3168 
3169   /* create the coarse KSP object only once with defaults */
3170   if (!pcbddc->coarse_ksp) {
3171     char prefix[256],str_level[3];
3172     size_t len;
3173     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3174     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3175     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3176     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3177     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3178     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3179     /* prefix */
3180     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3181     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3182     if (!pcbddc->current_level) {
3183       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3184       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3185     } else {
3186       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3187       if (pcbddc->current_level>1) len -= 2;
3188       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3189       *(prefix+len)='\0';
3190       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3191       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3192     }
3193     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3194   }
3195   /* allow user customization */
3196   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3197   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3198   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3199 
3200   /* get some info after set from options */
3201   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3202   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3203   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3204   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3205     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3206     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3207     isbddc = PETSC_FALSE;
3208   }
3209 
3210   /* propagate BDDC info to the next level */
3211   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3212   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3213   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3214 
3215   /* creates temporary MATIS object for coarse matrix */
3216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3217   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3218   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3219   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3220   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3221   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3222   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3223   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3224   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3225 
3226   /* assemble coarse matrix */
3227   if (isbddc || isnn) {
3228     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3229   } else {
3230     if (coarse_reuse) {
3231       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3232       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3233       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3234     } else {
3235       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3236     }
3237   }
3238   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3239 
3240   /* create local to global scatters for coarse problem */
3241   if (pcbddc->new_primal_space) {
3242     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3243     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3244     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3245     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3246     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3247   }
3248   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3249 
3250   /* propagate symmetry info to coarse matrix */
3251   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3252 
3253   /* Compute coarse null space (special handling by BDDC only) */
3254   if (pcbddc->NullSpace) {
3255     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3256     if (isbddc) {
3257       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3258     } else {
3259       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3260     }
3261   }
3262 
3263   /* set operators */
3264   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3265 
3266   /* additional KSP customization */
3267   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3268   if (max_it < 5) {
3269     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3270   }
3271   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3272 
3273 
3274   /* print some info if requested */
3275   if (pcbddc->dbg_flag) {
3276     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3277     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3278     if (!multilevel_allowed) {
3279       if (multilevel_requested) {
3280         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3281       } else if (pcbddc->max_levels) {
3282         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3283       }
3284     }
3285     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3286     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3287   }
3288 
3289   /* setup coarse ksp */
3290   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3291   if (pcbddc->dbg_flag) {
3292     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3293     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3294     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3295     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3296   }
3297 
3298   /* Check coarse problem if requested */
3299   if (pcbddc->dbg_flag) {
3300     KSP       check_ksp;
3301     KSPType   check_ksp_type;
3302     PC        check_pc;
3303     Vec       check_vec;
3304     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3305     PetscInt  its;
3306     PetscBool ispreonly,compute;
3307 
3308     /* Create ksp object suitable for estimation of extreme eigenvalues */
3309     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3310     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3311     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3312     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3313     if (ispreonly) {
3314       check_ksp_type = KSPPREONLY;
3315       compute = PETSC_FALSE;
3316     } else {
3317       if (issym) check_ksp_type = KSPCG;
3318       else check_ksp_type = KSPGMRES;
3319       compute = PETSC_TRUE;
3320     }
3321     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3322     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3323     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3324     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3325     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3326     /* create random vec */
3327     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3328     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3329     if (CoarseNullSpace) {
3330       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3331     }
3332     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3333     /* solve coarse problem */
3334     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3335     if (CoarseNullSpace) {
3336       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3337     }
3338     /* check coarse problem residual error */
3339     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3340     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3341     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3342     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3343     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3344     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3345     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3346     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3347     /* get eigenvalue estimation if preonly has not been requested */
3348     if (!ispreonly) {
3349       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3350       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3351       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3352     }
3353     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3354     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3355   }
3356   /* free memory */
3357   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3358   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 #undef __FUNCT__
3363 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3364 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3365 {
3366   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3367   PC_IS*         pcis = (PC_IS*)pc->data;
3368   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3369   PetscInt       i,coarse_size;
3370   PetscInt       *local_primal_indices;
3371   PetscErrorCode ierr;
3372 
3373   PetscFunctionBegin;
3374   /* Compute global number of coarse dofs */
3375   if (!pcbddc->primal_indices_local_idxs) {
3376     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
3377   }
3378   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3379 
3380   /* check numbering */
3381   if (pcbddc->dbg_flag) {
3382     PetscScalar coarsesum,*array;
3383 
3384     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3385     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3386     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3387     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3388     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3389     for (i=0;i<pcbddc->local_primal_size;i++) {
3390       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3391     }
3392     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3393     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3394     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3395     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3396     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3397     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3398     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3399     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3400     for (i=0;i<pcis->n;i++) {
3401       if (array[i] == 1.0) {
3402         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3403       }
3404     }
3405     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3406     for (i=0;i<pcis->n;i++) {
3407       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3408     }
3409     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3410     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3411     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3412     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3413     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3414     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3415     if (pcbddc->dbg_flag > 1) {
3416       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3417       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3418       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3419       for (i=0;i<pcbddc->local_primal_size;i++) {
3420         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3421       }
3422       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3423     }
3424     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3425   }
3426   /* get back data */
3427   *coarse_size_n = coarse_size;
3428   *local_primal_indices_n = local_primal_indices;
3429   PetscFunctionReturn(0);
3430 }
3431 
3432