xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision cc85fe4ded5189db5e5e073ce90ef04de0003fdb)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "PCBDDCCreateWorkVectors"
110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
111 {
112   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
113   PC_IS          *pcis = (PC_IS*)pc->data;
114   VecType        impVecType;
115   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
120   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
121   local_primal_size = n_constraints+n_vertices;
122   n_R = pcis->n-n_vertices;
123   /* local work vectors */
124   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
125   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
126   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
127   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
128   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
129   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
130   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
131   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
132   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
133   if (n_constraints) {
134     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
135     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
136     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
137   }
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "PCBDDCSetUpCorrection"
143 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
144 {
145   PetscErrorCode         ierr;
146   /* pointers to pcis and pcbddc */
147   PC_IS*                 pcis = (PC_IS*)pc->data;
148   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
149   /* submatrices of local problem */
150   Mat                    A_RV,A_VR,A_VV;
151   /* working matrices */
152   Mat                    M1,M2,M3,C_CR;
153   /* working vectors */
154   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
155   /* additional working stuff */
156   IS                     is_aux;
157   ISLocalToGlobalMapping BtoNmap;
158   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
159   const PetscScalar      *array,*row_cmat_values;
160   const PetscInt         *row_cmat_indices,*idx_R_local;
161   PetscInt               *vertices,*idx_V_B,*auxindices;
162   PetscInt               n_vertices,n_constraints,size_of_constraint;
163   PetscInt               i,j,n_R,n_D,n_B;
164   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
165   /* Vector and matrix types */
166   VecType                impVecType;
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
184 
185   /* Allocating some extra storage just to be safe */
186   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
187   for (i=0;i<pcis->n;i++) auxindices[i]=i;
188 
189   /* vertices in boundary numbering */
190   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
191   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
192   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
193   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
194   if (i != n_vertices) {
195     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
196   }
197 
198   /* some work vectors on vertices and/or constraints */
199   if (n_vertices) {
200     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
201     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
202     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
203     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
204   }
205   if (n_constraints) {
206     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
207     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
208   }
209 
210   /* Precompute stuffs needed for preprocessing and application of BDDC*/
211   if (n_constraints) {
212     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
213     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
214     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
215     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
216 
217     /* Extract constraints on R nodes: C_{CR}  */
218     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
219     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
220     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
221 
222     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
223     for (i=0;i<n_constraints;i++) {
224       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
225       /* Get row of constraint matrix in R numbering */
226       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
227       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
228       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
229       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
230       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
231       /* Solve for row of constraint matrix in R numbering */
232       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
233       /* Set values in local_auxmat2 */
234       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
235       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
236       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
237     }
238     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
241 
242     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
243     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
244     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
245     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
246     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
247     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
248     ierr = MatSetUp(M1);CHKERRQ(ierr);
249     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
250     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
251     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
252     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
253     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
254     ierr = MatDestroy(&M2);CHKERRQ(ierr);
255     ierr = MatDestroy(&M3);CHKERRQ(ierr);
256     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
257     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
258   }
259 
260   /* Get submatrices from subdomain matrix */
261   if (n_vertices) {
262     PetscInt ibs,iibs,mbs;
263     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
264     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
265     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
266     ierr = ISGetBlockSize(is_aux,&iibs);CHKERRQ(ierr);
267     if (ibs != mbs || iibs != mbs) { /* TODO BAIJ */
268       Mat newmat;
269       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
270       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
271       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
272       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
273       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
274     } else {
275       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
276       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
277       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
278     }
279     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
280   }
281 
282   /* Matrix of coarse basis functions (local) */
283   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
284   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
285   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
286   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
287   if (pcbddc->switch_static || pcbddc->dbg_flag) {
288     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
289     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
290     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
291     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
292   }
293 
294   if (pcbddc->dbg_flag) {
295     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
296     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
297     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
298   }
299   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
300   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
301 
302   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
303 
304   /* vertices */
305   for (i=0;i<n_vertices;i++) {
306     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
307     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
308     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
309     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
310     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
311     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
312     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
313     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
314     if (n_constraints) {
315       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
316       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
317       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
318     }
319     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
320     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
321 
322     /* Set values in coarse basis function and subdomain part of coarse_mat */
323     /* coarse basis functions */
324     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
325     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
326     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
327     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
328     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
329     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
330     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
331     if (pcbddc->switch_static || pcbddc->dbg_flag) {
332       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
333       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
334       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
335       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
336       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
337     }
338     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
339     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
340     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
341     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
342     if (n_constraints) {
343       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
344       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
345       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
346     }
347 
348     /* check */
349     if (pcbddc->dbg_flag) {
350       /* assemble subdomain vector on local nodes */
351       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
352       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
353       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
354       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
355       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
356       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
357       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
358       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
359       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
360       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
361       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
362       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
363       if (n_constraints) {
364         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
365         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
366         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
367       }
368       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
369       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
370       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
371       /* check saddle point solution */
372       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
373       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
374       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
375       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
376       /* shift by the identity matrix */
377       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
378       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
379       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
380       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
381     }
382   }
383 
384   /* constraints */
385   for (i=0;i<n_constraints;i++) {
386     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
387     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
388     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
389     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
390     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
391     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
392     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
393     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
394     if (n_vertices) {
395       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
396     }
397     /* Set values in coarse basis function and subdomain part of coarse_mat */
398     /* coarse basis functions */
399     j = i+n_vertices; /* don't touch this! */
400     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
401     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
404     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
405     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
406     if (pcbddc->switch_static || pcbddc->dbg_flag) {
407       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
408       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
409       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
410       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
411       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
412     }
413     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
414     if (n_vertices) {
415       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
416       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
417       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
418     }
419     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
420     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
421     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
422 
423     if (pcbddc->dbg_flag) {
424       /* assemble subdomain vector on nodes */
425       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
426       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
427       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
428       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
429       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
430       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
431       /* assemble subdomain vector of lagrange multipliers */
432       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
433       if (n_vertices) {
434         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
435         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
436         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
437       }
438       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
439       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
440       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
441       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
443       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
444       /* check saddle point solution */
445       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
447       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
448       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
449       /* shift by the identity matrix */
450       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
451       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
453       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
454     }
455   }
456   /* call assembling routines for local coarse basis */
457   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
458   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
459   if (pcbddc->switch_static || pcbddc->dbg_flag) {
460     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
461     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
462   }
463 
464   /* compute other basis functions for non-symmetric problems */
465   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
466   if (!setsym || (setsym && !issym)) {
467     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
468     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
469     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
470     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
471     if (pcbddc->switch_static || pcbddc->dbg_flag) {
472       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
473       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
474       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
475       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
476     }
477     for (i=0;i<pcbddc->local_primal_size;i++) {
478       if (n_constraints) {
479         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
480         for (j=0;j<n_constraints;j++) {
481           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
482         }
483         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
484         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
485       }
486       if (i<n_vertices) {
487         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
488         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
489         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
490         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
491         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
492         if (n_constraints) {
493           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
494         }
495       } else {
496         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
497       }
498       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
499       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
500       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
501       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
502       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
503       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
504       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
505       if (i<n_vertices) {
506         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
507       }
508       if (pcbddc->switch_static || pcbddc->dbg_flag) {
509         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
511         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
512         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
513         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
514       }
515 
516       if (pcbddc->dbg_flag) {
517         /* assemble subdomain vector on nodes */
518         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
519         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
520         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
521         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
522         if (i<n_vertices) {
523           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
524         }
525         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
526         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
527         /* assemble subdomain vector of lagrange multipliers */
528         for (j=0;j<pcbddc->local_primal_size;j++) {
529           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
530         }
531         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
532         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
533         /* check saddle point solution */
534         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
535         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
536         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
537         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
538         /* shift by the identity matrix */
539         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
540         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
541         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
542         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
543       }
544     }
545     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547     if (pcbddc->switch_static || pcbddc->dbg_flag) {
548       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     }
551   }
552   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
553   /* Checking coarse_sub_mat and coarse basis functios */
554   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
555   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
556   if (pcbddc->dbg_flag) {
557     Mat         coarse_sub_mat;
558     Mat         AUXMAT,TM1,TM2,TM3,TM4;
559     Mat         coarse_phi_D,coarse_phi_B;
560     Mat         coarse_psi_D,coarse_psi_B;
561     Mat         A_II,A_BB,A_IB,A_BI;
562     MatType     checkmattype=MATSEQAIJ;
563     PetscReal   real_value;
564 
565     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
566     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
567     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
568     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
569     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
570     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
571     if (pcbddc->coarse_psi_B) {
572       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
573       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
574     }
575     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
576 
577     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
578     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
579     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
580     if (pcbddc->coarse_psi_B) {
581       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
582       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
583       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
584       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
585       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
586       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
587       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
588       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
589       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
590       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
591       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
592       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
593     } else {
594       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
595       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
596       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
597       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
598       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
599       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
600       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
601       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
602     }
603     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
604     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
605     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
606     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
607     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
608     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
609     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
610     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
612     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
613     for (i=0;i<pcbddc->local_primal_size;i++) {
614       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
615     }
616     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
617     for (i=0;i<pcbddc->local_primal_size;i++) {
618       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
619     }
620     if (pcbddc->coarse_psi_B) {
621       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
622       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
623         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
624       }
625       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
626       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
627         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
628       }
629     }
630     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
631     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
632     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
633     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
634     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
635     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
636     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
637     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
638     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
639     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
640     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
641     if (pcbddc->coarse_psi_B) {
642       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
643       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
644     }
645     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
646     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
647     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
648     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
649   }
650   /* free memory */
651   if (n_vertices) {
652     ierr = PetscFree(vertices);CHKERRQ(ierr);
653     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
654     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
655     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
656     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
657     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
658   }
659   if (n_constraints) {
660     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
661     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
662     ierr = MatDestroy(&M1);CHKERRQ(ierr);
663     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
664   }
665   ierr = PetscFree(auxindices);CHKERRQ(ierr);
666   /* get back data */
667   *coarse_submat_vals_n = coarse_submat_vals;
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNCT__
672 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
673 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
674 {
675   PC_IS*            pcis = (PC_IS*)(pc->data);
676   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
677   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
678   /* manage repeated solves */
679   MatReuse          reuse;
680   MatStructure      matstruct;
681   PetscErrorCode    ierr;
682 
683   PetscFunctionBegin;
684   /* get mat flags */
685   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
686   reuse = MAT_INITIAL_MATRIX;
687   if (pc->setupcalled) {
688     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
689     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
690     if (matstruct == SAME_NONZERO_PATTERN) {
691       reuse = MAT_REUSE_MATRIX;
692     } else {
693       reuse = MAT_INITIAL_MATRIX;
694     }
695   }
696   if (reuse == MAT_INITIAL_MATRIX) {
697     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
698     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
699     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
700     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
701     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
702   }
703 
704   /* transform local matrices if needed */
705   if (pcbddc->use_change_of_basis) {
706     Mat         change_mat_all;
707     PetscScalar *row_cmat_values;
708     PetscInt    *row_cmat_indices;
709     PetscInt    *nnz,*is_indices,*temp_indices;
710     PetscInt    i,j,k,n_D,n_B;
711 
712     /* Get Non-overlapping dimensions */
713     n_B = pcis->n_B;
714     n_D = pcis->n-n_B;
715 
716     /* compute nonzero structure of change of basis on all local nodes */
717     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
718     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
719     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
720     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
721     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
722     k=1;
723     for (i=0;i<n_B;i++) {
724       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
725       nnz[is_indices[i]]=j;
726       if (k < j) k = j;
727       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
728     }
729     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
730     /* assemble change of basis matrix on the whole set of local dofs */
731     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
732     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
733     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
734     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
735     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
736     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     for (i=0;i<n_D;i++) {
738       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
739     }
740     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
741     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
742     for (i=0;i<n_B;i++) {
743       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
744       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
745       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
746       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
747     }
748     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
749     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
750     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
751     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
752     if (i==1) {
753       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
754     } else {
755       Mat work_mat;
756       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
757       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
758       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
759     }
760     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
761     ierr = PetscFree(nnz);CHKERRQ(ierr);
762     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
763   } else {
764     /* without change of basis, the local matrix is unchanged */
765     if (!pcbddc->local_mat) {
766       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
767       pcbddc->local_mat = matis->A;
768     }
769   }
770 
771   /* get submatrices */
772   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
773   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
774   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
775   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
776   PetscFunctionReturn(0);
777 }
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
781 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
782 {
783   PC_IS*         pcis = (PC_IS*)(pc->data);
784   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
785   IS             is_aux1,is_aux2;
786   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
787   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
788   PetscBT        bitmask;
789   PetscErrorCode ierr;
790 
791   PetscFunctionBegin;
792   /* Set Non-overlapping dimensions */
793   n_B = pcis->n_B; n_D = pcis->n - n_B;
794   /* get vertex indices from constraint matrix */
795   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
796   /* Set number of constraints */
797   n_constraints = pcbddc->local_primal_size-n_vertices;
798   /* create auxiliary bitmask */
799   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
800   for (i=0;i<n_vertices;i++) {
801     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
802   }
803   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
804   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
805   for (i=0, n_R=0; i<pcis->n; i++) {
806     if (!PetscBTLookup(bitmask,i)) {
807       idx_R_local[n_R] = i;
808       n_R++;
809     }
810   }
811   ierr = PetscFree(vertices);CHKERRQ(ierr);
812   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
813 
814   /* print some info if requested */
815   if (pcbddc->dbg_flag) {
816     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
817     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
818     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
819     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
820     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
821     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
822     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
823   }
824 
825   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
826   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
827   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
828   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
829   for (i=0; i<n_D; i++) {
830     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
831   }
832   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
833   for (i=0, j=0; i<n_R; i++) {
834     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
835       aux_array1[j++] = i;
836     }
837   }
838   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
839   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
840   for (i=0, j=0; i<n_B; i++) {
841     if (!PetscBTLookup(bitmask,is_indices[i])) {
842       aux_array2[j++] = i;
843     }
844   }
845   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
846   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
847   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
848   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
849   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
850 
851   if (pcbddc->switch_static || pcbddc->dbg_flag) {
852     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
853     for (i=0, j=0; i<n_R; i++) {
854       if (PetscBTLookup(bitmask,idx_R_local[i])) {
855         aux_array1[j++] = i;
856       }
857     }
858     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
859     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
860     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
861   }
862   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
869 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
870 {
871   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
872   PC_IS          *pcis = (PC_IS*)pc->data;
873   PC             pc_temp;
874   Mat            A_RR;
875   Vec            vec1,vec2,vec3;
876   MatStructure   matstruct;
877   PetscScalar    m_one = -1.0;
878   PetscReal      value;
879   PetscInt       n_D,n_R,ibs,mbs;
880   PetscBool      use_exact,use_exact_reduced;
881   PetscErrorCode ierr;
882   /* prefixes stuff */
883   char           dir_prefix[256],neu_prefix[256],str_level[3];
884   size_t         len;
885 
886   PetscFunctionBegin;
887   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
888 
889   /* compute prefixes */
890   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
891   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
892   if (!pcbddc->current_level) {
893     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
894     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
895     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
896     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
897   } else {
898     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
899     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
900     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
901     len -= 15; /* remove "pc_bddc_coarse_" */
902     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
903     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
904     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
905     *(dir_prefix+len)='\0';
906     *(neu_prefix+len)='\0';
907     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
908     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
909     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
910     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
911   }
912 
913   /* DIRICHLET PROBLEM */
914   /* Matrix for Dirichlet problem is pcis->A_II */
915   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
916   if (!pcbddc->ksp_D) { /* create object if not yet build */
917     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
918     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
919     /* default */
920     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
921     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
922     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
923     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
924     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
925   }
926   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
927   /* Allow user's customization */
928   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
929   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
930   if (!n_D) {
931     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
932     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
933   }
934   /* Set Up KSP for Dirichlet problem of BDDC */
935   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
936   /* set ksp_D into pcis data */
937   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
938   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
939   pcis->ksp_D = pcbddc->ksp_D;
940 
941   /* NEUMANN PROBLEM */
942   /* Matrix for Neumann problem is A_RR -> we need to create it */
943   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
944   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
945   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
946   if (ibs != mbs) { /* TODO BAIJ: see if this can be avoided */
947     Mat newmat;
948     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
949     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
950     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
951   } else {
952     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
953   }
954   if (!pcbddc->ksp_R) { /* create object if not yet build */
955     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
956     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
957     /* default */
958     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
959     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
960     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
961     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
962     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
963   }
964   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
965   /* Allow user's customization */
966   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
967   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
968   if (!n_R) {
969     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
970     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
971   }
972   /* Set Up KSP for Neumann problem of BDDC */
973   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
974 
975   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
976 
977   /* Dirichlet */
978   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
979   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
980   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
981   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
982   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
983   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
984   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
985   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
986   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
987   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
988   /* need to be adapted? */
989   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
990   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
991   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
992   /* print info */
993   if (pcbddc->dbg_flag) {
994     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
995     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
996     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
997     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
998     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
999   }
1000   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1001     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1002   }
1003 
1004   /* Neumann */
1005   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1006   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1007   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1008   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1009   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1010   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1011   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1012   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1013   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1014   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1015   /* need to be adapted? */
1016   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1017   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1018   /* print info */
1019   if (pcbddc->dbg_flag) {
1020     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1021     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1022   }
1023   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1024     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1025   }
1026   /* free Neumann problem's matrix */
1027   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1033 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1034 {
1035   PetscErrorCode ierr;
1036   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1037 
1038   PetscFunctionBegin;
1039   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1040   if (pcbddc->local_auxmat1) {
1041     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1042     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1043   }
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1049 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1050 {
1051   PetscErrorCode ierr;
1052   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1053   PC_IS*            pcis = (PC_IS*)  (pc->data);
1054   const PetscScalar zero = 0.0;
1055 
1056   PetscFunctionBegin;
1057   /* Application of PHI^T (or PSI^T)  */
1058   if (pcbddc->coarse_psi_B) {
1059     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1060     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1061   } else {
1062     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1063     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1064   }
1065   /* Scatter data of coarse_rhs */
1066   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1067   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1068 
1069   /* Local solution on R nodes */
1070   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1071   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1072   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1073   if (pcbddc->switch_static) {
1074     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1075     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1076   }
1077   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1078   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1079   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1080   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081   if (pcbddc->switch_static) {
1082     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1083     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1084   }
1085 
1086   /* Coarse solution */
1087   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1089     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1090   }
1091   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1093 
1094   /* Sum contributions from two levels */
1095   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1096   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1102 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1103 {
1104   PetscErrorCode ierr;
1105   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1106 
1107   PetscFunctionBegin;
1108   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1114 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1115 {
1116   PetscErrorCode ierr;
1117   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1118 
1119   PetscFunctionBegin;
1120   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 /* uncomment for testing purposes */
1125 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1126 #undef __FUNCT__
1127 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1128 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1129 {
1130   PetscErrorCode    ierr;
1131   PC_IS*            pcis = (PC_IS*)(pc->data);
1132   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1133   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1134   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1135   MatType           impMatType=MATSEQAIJ;
1136   /* one and zero */
1137   PetscScalar       one=1.0,zero=0.0;
1138   /* space to store constraints and their local indices */
1139   PetscScalar       *temp_quadrature_constraint;
1140   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1141   /* iterators */
1142   PetscInt          i,j,k,total_counts,temp_start_ptr;
1143   /* stuff to store connected components stored in pcbddc->mat_graph */
1144   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1145   PetscInt          n_ISForFaces,n_ISForEdges;
1146   /* near null space stuff */
1147   MatNullSpace      nearnullsp;
1148   const Vec         *nearnullvecs;
1149   Vec               *localnearnullsp;
1150   PetscBool         nnsp_has_cnst;
1151   PetscInt          nnsp_size;
1152   PetscScalar       *array;
1153   /* BLAS integers */
1154   PetscBLASInt      lwork,lierr;
1155   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1156   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1157   /* LAPACK working arrays for SVD or POD */
1158   PetscBool         skip_lapack;
1159   PetscScalar       *work;
1160   PetscReal         *singular_vals;
1161 #if defined(PETSC_USE_COMPLEX)
1162   PetscReal         *rwork;
1163 #endif
1164 #if defined(PETSC_MISSING_LAPACK_GESVD)
1165   PetscBLASInt      Blas_one_2=1;
1166   PetscScalar       *temp_basis,*correlation_mat;
1167 #else
1168   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1169   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1170 #endif
1171   /* change of basis */
1172   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1173   PetscBool         boolforchange,qr_needed;
1174   PetscBT           touched,change_basis,qr_needed_idx;
1175   /* auxiliary stuff */
1176   PetscInt          *nnz,*is_indices,*local_to_B;
1177   /* some quantities */
1178   PetscInt          n_vertices,total_primal_vertices;
1179   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1180 
1181 
1182   PetscFunctionBegin;
1183   /* Get index sets for faces, edges and vertices from graph */
1184   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1185     pcbddc->use_vertices = PETSC_TRUE;
1186   }
1187   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1188   /* HACK: provide functions to set change of basis */
1189   if (!ISForVertices && pcbddc->NullSpace) {
1190     pcbddc->use_change_of_basis = PETSC_TRUE;
1191     pcbddc->use_change_on_faces = PETSC_FALSE;
1192   }
1193   /* print some info */
1194   if (pcbddc->dbg_flag) {
1195     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1196     i = 0;
1197     if (ISForVertices) {
1198       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1199     }
1200     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1201     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1202     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1203     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1204   }
1205   /* check if near null space is attached to global mat */
1206   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1207   if (nearnullsp) {
1208     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1209   } else { /* if near null space is not provided BDDC uses constants by default */
1210     nnsp_size = 0;
1211     nnsp_has_cnst = PETSC_TRUE;
1212   }
1213   /* get max number of constraints on a single cc */
1214   max_constraints = nnsp_size;
1215   if (nnsp_has_cnst) max_constraints++;
1216 
1217   /*
1218        Evaluate maximum storage size needed by the procedure
1219        - temp_indices will contain start index of each constraint stored as follows
1220        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1221        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1222        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1223                                                                                                                                                          */
1224   total_counts = n_ISForFaces+n_ISForEdges;
1225   total_counts *= max_constraints;
1226   n_vertices = 0;
1227   if (ISForVertices) {
1228     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1229   }
1230   total_counts += n_vertices;
1231   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1232   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1233   total_counts = 0;
1234   max_size_of_constraint = 0;
1235   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1236     if (i<n_ISForEdges) {
1237       used_IS = &ISForEdges[i];
1238     } else {
1239       used_IS = &ISForFaces[i-n_ISForEdges];
1240     }
1241     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1242     total_counts += j;
1243     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1244   }
1245   total_counts *= max_constraints;
1246   total_counts += n_vertices;
1247   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1248   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1249   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1250   /* local to boundary numbering */
1251   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1252   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1253   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1254   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1255   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1256   /* get local part of global near null space vectors */
1257   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1258   for (k=0;k<nnsp_size;k++) {
1259     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1260     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1262   }
1263 
1264   /* whether or not to skip lapack calls */
1265   skip_lapack = PETSC_TRUE;
1266   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1267 
1268   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1269   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1270     PetscScalar temp_work;
1271 #if defined(PETSC_MISSING_LAPACK_GESVD)
1272     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1273     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1274     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1275     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1276 #if defined(PETSC_USE_COMPLEX)
1277     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1278 #endif
1279     /* now we evaluate the optimal workspace using query with lwork=-1 */
1280     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1281     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1282     lwork = -1;
1283     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1284 #if !defined(PETSC_USE_COMPLEX)
1285     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1286 #else
1287     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1288 #endif
1289     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1290     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1291 #else /* on missing GESVD */
1292     /* SVD */
1293     PetscInt max_n,min_n;
1294     max_n = max_size_of_constraint;
1295     min_n = max_constraints;
1296     if (max_size_of_constraint < max_constraints) {
1297       min_n = max_size_of_constraint;
1298       max_n = max_constraints;
1299     }
1300     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1301 #if defined(PETSC_USE_COMPLEX)
1302     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1303 #endif
1304     /* now we evaluate the optimal workspace using query with lwork=-1 */
1305     lwork = -1;
1306     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1307     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1308     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1309     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1310 #if !defined(PETSC_USE_COMPLEX)
1311     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1312 #else
1313     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1314 #endif
1315     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1316     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1317 #endif /* on missing GESVD */
1318     /* Allocate optimal workspace */
1319     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1320     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1321   }
1322   /* Now we can loop on constraining sets */
1323   total_counts = 0;
1324   temp_indices[0] = 0;
1325   /* vertices */
1326   if (ISForVertices) {
1327     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1328     if (nnsp_has_cnst) { /* consider all vertices */
1329       for (i=0;i<n_vertices;i++) {
1330         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1331         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1332         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1333         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1334         total_counts++;
1335       }
1336     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1337       PetscBool used_vertex;
1338       for (i=0;i<n_vertices;i++) {
1339         used_vertex = PETSC_FALSE;
1340         k = 0;
1341         while (!used_vertex && k<nnsp_size) {
1342           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1343           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1344             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1345             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1346             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1347             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1348             total_counts++;
1349             used_vertex = PETSC_TRUE;
1350           }
1351           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1352           k++;
1353         }
1354       }
1355     }
1356     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1357     n_vertices = total_counts;
1358   }
1359 
1360   /* edges and faces */
1361   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1362     if (i<n_ISForEdges) {
1363       used_IS = &ISForEdges[i];
1364       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1365     } else {
1366       used_IS = &ISForFaces[i-n_ISForEdges];
1367       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1368     }
1369     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1370     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1371     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1372     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1373     /* change of basis should not be performed on local periodic nodes */
1374     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1375     if (nnsp_has_cnst) {
1376       PetscScalar quad_value;
1377       temp_constraints++;
1378       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1379       for (j=0;j<size_of_constraint;j++) {
1380         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1381         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1382         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1383       }
1384       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1385       if (boolforchange) {
1386         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1387       }
1388       total_counts++;
1389     }
1390     for (k=0;k<nnsp_size;k++) {
1391       PetscReal real_value;
1392       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1393       for (j=0;j<size_of_constraint;j++) {
1394         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1395         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1396         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1397       }
1398       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1399       /* check if array is null on the connected component */
1400       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1401       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1402       if (real_value > 0.0) { /* keep indices and values */
1403         temp_constraints++;
1404         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1405         if (boolforchange) {
1406           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1407         }
1408         total_counts++;
1409       }
1410     }
1411     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1412     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1413     if (!pcbddc->use_nnsp_true) {
1414       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1415 
1416 #if defined(PETSC_MISSING_LAPACK_GESVD)
1417       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1418          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1419          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1420             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1421             from that computed using LAPACKgesvd
1422          -> This is due to a different computation of eigenvectors in LAPACKheev
1423          -> The quality of the POD-computed basis will be the same */
1424       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1425       /* Store upper triangular part of correlation matrix */
1426       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1427       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1428       for (j=0;j<temp_constraints;j++) {
1429         for (k=0;k<j+1;k++) {
1430           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1431         }
1432       }
1433       /* compute eigenvalues and eigenvectors of correlation matrix */
1434       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1435       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1436 #if !defined(PETSC_USE_COMPLEX)
1437       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1438 #else
1439       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1440 #endif
1441       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1442       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1443       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1444       j=0;
1445       while (j < temp_constraints && singular_vals[j] < tol) j++;
1446       total_counts=total_counts-j;
1447       /* scale and copy POD basis into used quadrature memory */
1448       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1449       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1450       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1451       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1452       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1453       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1454       if (j<temp_constraints) {
1455         PetscInt ii;
1456         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1457         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1458         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1459         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1460         for (k=0;k<temp_constraints-j;k++) {
1461           for (ii=0;ii<size_of_constraint;ii++) {
1462             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1463           }
1464         }
1465       }
1466 #else  /* on missing GESVD */
1467       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1468       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1469       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1470       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1471 #if !defined(PETSC_USE_COMPLEX)
1472       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1473 #else
1474       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1475 #endif
1476       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1477       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1478       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1479       k = temp_constraints;
1480       if (k > size_of_constraint) k = size_of_constraint;
1481       j = 0;
1482       while (j < k && singular_vals[k-j-1] < tol) j++;
1483       total_counts = total_counts-temp_constraints+k-j;
1484 #endif /* on missing GESVD */
1485     }
1486   }
1487   /* free index sets of faces, edges and vertices */
1488   for (i=0;i<n_ISForFaces;i++) {
1489     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1490   }
1491   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1492   for (i=0;i<n_ISForEdges;i++) {
1493     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1494   }
1495   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1496   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1497 
1498   /* free workspace */
1499   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1500     ierr = PetscFree(work);CHKERRQ(ierr);
1501 #if defined(PETSC_USE_COMPLEX)
1502     ierr = PetscFree(rwork);CHKERRQ(ierr);
1503 #endif
1504     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1505 #if defined(PETSC_MISSING_LAPACK_GESVD)
1506     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1507     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1508 #endif
1509   }
1510   for (k=0;k<nnsp_size;k++) {
1511     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1512   }
1513   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1514 
1515   /* set quantities in pcbddc data structure */
1516   /* n_vertices defines the number of subdomain corners in the primal space */
1517   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1518   pcbddc->local_primal_size = total_counts;
1519   pcbddc->n_vertices = n_vertices;
1520   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1521 
1522   /* Create constraint matrix */
1523   /* The constraint matrix is used to compute the l2g map of primal dofs */
1524   /* so we need to set it up properly either with or without change of basis */
1525   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1526   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1527   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1528   /* array to compute a local numbering of constraints : vertices first then constraints */
1529   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1530   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1531   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1532   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1533   /* auxiliary stuff for basis change */
1534   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1535   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1536 
1537   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1538   total_primal_vertices=0;
1539   for (i=0;i<pcbddc->local_primal_size;i++) {
1540     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1541     if (size_of_constraint == 1) {
1542       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1543       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1544       aux_primal_minloc[total_primal_vertices]=0;
1545       total_primal_vertices++;
1546     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1547       PetscInt min_loc,min_index;
1548       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1549       /* find first untouched local node */
1550       k = 0;
1551       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1552       min_index = global_indices[k];
1553       min_loc = k;
1554       /* search the minimum among global nodes already untouched on the cc */
1555       for (k=1;k<size_of_constraint;k++) {
1556         /* there can be more than one constraint on a single connected component */
1557         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1558           min_index = global_indices[k];
1559           min_loc = k;
1560         }
1561       }
1562       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1563       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1564       aux_primal_minloc[total_primal_vertices]=min_loc;
1565       total_primal_vertices++;
1566     }
1567   }
1568   /* determine if a QR strategy is needed for change of basis */
1569   qr_needed = PETSC_FALSE;
1570   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1571   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1572     if (PetscBTLookup(change_basis,i)) {
1573       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1574       j = 0;
1575       for (k=0;k<size_of_constraint;k++) {
1576         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1577           j++;
1578         }
1579       }
1580       /* found more than one primal dof on the cc */
1581       if (j > 1) {
1582         PetscBTSet(qr_needed_idx,i);
1583         qr_needed = PETSC_TRUE;
1584       }
1585     }
1586   }
1587   /* free workspace */
1588   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1589   /* permute indices in order to have a sorted set of vertices */
1590   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1591 
1592   /* nonzero structure of constraint matrix */
1593   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1594   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1595   j=total_primal_vertices;
1596   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1597     if (!PetscBTLookup(change_basis,i)) {
1598       nnz[j]=temp_indices[i+1]-temp_indices[i];
1599       j++;
1600     }
1601   }
1602   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1603   ierr = PetscFree(nnz);CHKERRQ(ierr);
1604   /* set values in constraint matrix */
1605   for (i=0;i<total_primal_vertices;i++) {
1606     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1607   }
1608   total_counts = total_primal_vertices;
1609   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1610     if (!PetscBTLookup(change_basis,i)) {
1611       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1612       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1613       total_counts++;
1614     }
1615   }
1616   /* assembling */
1617   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1618   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1619   /*
1620   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1621   */
1622   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1623   if (pcbddc->use_change_of_basis) {
1624     /* dual and primal dofs on a single cc */
1625     PetscInt     dual_dofs,primal_dofs;
1626     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1627     PetscInt     primal_counter;
1628     /* working stuff for GEQRF */
1629     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1630     PetscBLASInt lqr_work;
1631     /* working stuff for UNGQR */
1632     PetscScalar  *gqr_work,lgqr_work_t;
1633     PetscBLASInt lgqr_work;
1634     /* working stuff for TRTRS */
1635     PetscScalar  *trs_rhs;
1636     PetscBLASInt Blas_NRHS;
1637     /* pointers for values insertion into change of basis matrix */
1638     PetscInt     *start_rows,*start_cols;
1639     PetscScalar  *start_vals;
1640     /* working stuff for values insertion */
1641     PetscBT      is_primal;
1642 
1643     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1644     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1645     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1646     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1647     /* work arrays */
1648     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1649     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1650     /* nonzeros per row */
1651     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1652       if (PetscBTLookup(change_basis,i)) {
1653         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1654         if (PetscBTLookup(qr_needed_idx,i)) {
1655           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1656         } else {
1657           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1658           /* get local primal index on the cc */
1659           j = 0;
1660           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1661           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1662         }
1663       }
1664     }
1665     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1666     ierr = PetscFree(nnz);CHKERRQ(ierr);
1667     /* Set initial identity in the matrix */
1668     for (i=0;i<pcis->n_B;i++) {
1669       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1670     }
1671 
1672     if (pcbddc->dbg_flag) {
1673       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1674       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1675     }
1676 
1677 
1678     /* Now we loop on the constraints which need a change of basis */
1679     /*
1680        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1681        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1682 
1683        Basic blocks of change of basis matrix T computed
1684 
1685           - Using the following block transformation if there is only a primal dof on the cc
1686             (in the example, primal dof is the last one of the edge in LOCAL ordering
1687              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1688             | 1        0   ...        0     1 |
1689             | 0        1   ...        0     1 |
1690             |              ...                |
1691             | 0        ...            1     1 |
1692             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1693 
1694           - via QR decomposition of constraints otherwise
1695     */
1696     if (qr_needed) {
1697       /* space to store Q */
1698       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1699       /* first we issue queries for optimal work */
1700       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1701       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1702       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1703       lqr_work = -1;
1704       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1705       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1706       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1707       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1708       lgqr_work = -1;
1709       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1710       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1711       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1712       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1713       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1714       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1715       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1716       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1717       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1718       /* array to store scaling factors for reflectors */
1719       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1720       /* array to store rhs and solution of triangular solver */
1721       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1722       /* allocating workspace for check */
1723       if (pcbddc->dbg_flag) {
1724         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1725       }
1726     }
1727     /* array to store whether a node is primal or not */
1728     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1729     for (i=0;i<total_primal_vertices;i++) {
1730       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1731     }
1732 
1733     /* loop on constraints and see whether or not they need a change of basis and compute it */
1734     /* -> using implicit ordering contained in temp_indices data */
1735     total_counts = pcbddc->n_vertices;
1736     primal_counter = total_counts;
1737     while (total_counts<pcbddc->local_primal_size) {
1738       primal_dofs = 1;
1739       if (PetscBTLookup(change_basis,total_counts)) {
1740         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1741         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1742           primal_dofs++;
1743         }
1744         /* get constraint info */
1745         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1746         dual_dofs = size_of_constraint-primal_dofs;
1747 
1748         if (pcbddc->dbg_flag) {
1749           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1750         }
1751 
1752         if (primal_dofs > 1) { /* QR */
1753 
1754           /* copy quadrature constraints for change of basis check */
1755           if (pcbddc->dbg_flag) {
1756             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1757           }
1758           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1759           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1760 
1761           /* compute QR decomposition of constraints */
1762           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1763           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1764           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1765           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1766           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1767           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1768           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1769 
1770           /* explictly compute R^-T */
1771           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1772           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1773           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1774           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1775           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1776           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1777           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1778           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1779           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1780           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1781 
1782           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1783           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1784           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1785           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1786           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1787           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1788           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1789           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1790           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1791 
1792           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1793              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1794              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1795           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1796           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1797           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1798           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1799           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1800           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1801           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1802           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1803           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1804           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1805 
1806           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1807           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1808           /* insert cols for primal dofs */
1809           for (j=0;j<primal_dofs;j++) {
1810             start_vals = &qr_basis[j*size_of_constraint];
1811             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1812             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1813           }
1814           /* insert cols for dual dofs */
1815           for (j=0,k=0;j<dual_dofs;k++) {
1816             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1817               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1818               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1819               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1820               j++;
1821             }
1822           }
1823 
1824           /* check change of basis */
1825           if (pcbddc->dbg_flag) {
1826             PetscInt   ii,jj;
1827             PetscBool valid_qr=PETSC_TRUE;
1828             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1829             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1830             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1831             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1832             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1833             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1834             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1835             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1836             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1837             for (jj=0;jj<size_of_constraint;jj++) {
1838               for (ii=0;ii<primal_dofs;ii++) {
1839                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1840                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1841               }
1842             }
1843             if (!valid_qr) {
1844               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1845               for (jj=0;jj<size_of_constraint;jj++) {
1846                 for (ii=0;ii<primal_dofs;ii++) {
1847                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1848                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1849                   }
1850                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1851                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1852                   }
1853                 }
1854               }
1855             } else {
1856               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1857             }
1858           }
1859         } else { /* simple transformation block */
1860           PetscInt row,col;
1861           PetscScalar val;
1862           for (j=0;j<size_of_constraint;j++) {
1863             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1864             if (!PetscBTLookup(is_primal,row)) {
1865               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1866               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1867               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1868             } else {
1869               for (k=0;k<size_of_constraint;k++) {
1870                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1871                 if (row != col) {
1872                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1873                 } else {
1874                   val = 1.0;
1875                 }
1876                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1877               }
1878             }
1879           }
1880         }
1881         /* increment primal counter */
1882         primal_counter += primal_dofs;
1883       } else {
1884         if (pcbddc->dbg_flag) {
1885           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1886         }
1887       }
1888       /* increment constraint counter total_counts */
1889       total_counts += primal_dofs;
1890     }
1891 
1892     /* free workspace */
1893     if (qr_needed) {
1894       if (pcbddc->dbg_flag) {
1895         ierr = PetscFree(work);CHKERRQ(ierr);
1896       }
1897       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1898       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1899       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1900       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1901       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1902     }
1903     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1904     /* assembling */
1905     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1906     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1907     /*
1908     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1909     */
1910   }
1911 
1912   /* flush dbg viewer */
1913   if (pcbddc->dbg_flag) {
1914     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1915   }
1916 
1917   /* free workspace */
1918   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1919   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
1920   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1921   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1922   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1923   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1924   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1925   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1926   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1927   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1933 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1934 {
1935   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1936   PC_IS       *pcis = (PC_IS*)pc->data;
1937   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1938   PetscInt    bs,ierr,i,vertex_size;
1939   PetscViewer viewer=pcbddc->dbg_viewer;
1940 
1941   PetscFunctionBegin;
1942   /* Init local Graph struct */
1943   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1944 
1945   /* Check validity of the csr graph passed in by the user */
1946   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1947     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1948   }
1949   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1950   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1951     Mat mat_adj;
1952     const PetscInt *xadj,*adjncy;
1953     PetscBool flg_row=PETSC_TRUE;
1954 
1955     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1956     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1957     if (!flg_row) {
1958       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1959     }
1960     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1961     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1962     if (!flg_row) {
1963       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1964     }
1965     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1966   }
1967 
1968   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1969   vertex_size = 1;
1970   if (!pcbddc->n_ISForDofs) {
1971     IS *custom_ISForDofs;
1972 
1973     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1974     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1975     for (i=0;i<bs;i++) {
1976       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1977     }
1978     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1979     /* remove my references to IS objects */
1980     for (i=0;i<bs;i++) {
1981       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1982     }
1983     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1984   } else { /* mat block size as vertex size (used for elasticity) */
1985     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1986   }
1987 
1988   /* Setup of Graph */
1989   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1990 
1991   /* Graph's connected components analysis */
1992   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1993 
1994   /* print some info to stdout */
1995   if (pcbddc->dbg_flag) {
1996     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1997   }
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2003 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2004 {
2005   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2006   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   n = 0;
2011   vertices = 0;
2012   if (pcbddc->ConstraintMatrix) {
2013     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2014     for (i=0;i<local_primal_size;i++) {
2015       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2016       if (size_of_constraint == 1) n++;
2017       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2018     }
2019     if (vertices_idx) {
2020       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2021       n = 0;
2022       for (i=0;i<local_primal_size;i++) {
2023         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2024         if (size_of_constraint == 1) {
2025           vertices[n++]=row_cmat_indices[0];
2026         }
2027         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2028       }
2029     }
2030   }
2031   *n_vertices = n;
2032   if (vertices_idx) *vertices_idx = vertices;
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 #undef __FUNCT__
2037 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2038 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2039 {
2040   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2041   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2042   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2043   PetscBT        touched;
2044   PetscErrorCode ierr;
2045 
2046     /* This function assumes that the number of local constraints per connected component
2047        is not greater than the number of nodes defined for the connected component
2048        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2049   PetscFunctionBegin;
2050   n = 0;
2051   constraints_index = 0;
2052   if (pcbddc->ConstraintMatrix) {
2053     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2054     max_size_of_constraint = 0;
2055     for (i=0;i<local_primal_size;i++) {
2056       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2057       if (size_of_constraint > 1) {
2058         n++;
2059       }
2060       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2061       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2062     }
2063     if (constraints_idx) {
2064       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2065       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2066       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2067       n = 0;
2068       for (i=0;i<local_primal_size;i++) {
2069         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2070         if (size_of_constraint > 1) {
2071           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2072           /* find first untouched local node */
2073           j = 0;
2074           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2075           min_index = row_cmat_global_indices[j];
2076           min_loc = j;
2077           /* search the minimum among nodes not yet touched on the connected component
2078              since there can be more than one constraint on a single cc */
2079           for (j=1;j<size_of_constraint;j++) {
2080             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2081               min_index = row_cmat_global_indices[j];
2082               min_loc = j;
2083             }
2084           }
2085           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2086           constraints_index[n++] = row_cmat_indices[min_loc];
2087         }
2088         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2089       }
2090       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2091       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2092     }
2093   }
2094   *n_constraints = n;
2095   if (constraints_idx) *constraints_idx = constraints_index;
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 /* the next two functions has been adapted from pcis.c */
2100 #undef __FUNCT__
2101 #define __FUNCT__ "PCBDDCApplySchur"
2102 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2103 {
2104   PetscErrorCode ierr;
2105   PC_IS          *pcis = (PC_IS*)(pc->data);
2106 
2107   PetscFunctionBegin;
2108   if (!vec2_B) { vec2_B = v; }
2109   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2110   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2111   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2112   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2113   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 #undef __FUNCT__
2118 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2119 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2120 {
2121   PetscErrorCode ierr;
2122   PC_IS          *pcis = (PC_IS*)(pc->data);
2123 
2124   PetscFunctionBegin;
2125   if (!vec2_B) { vec2_B = v; }
2126   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2127   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2128   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2129   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2130   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "PCBDDCSubsetNumbering"
2136 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2137 {
2138   Vec            local_vec,global_vec;
2139   IS             seqis,paris;
2140   VecScatter     scatter_ctx;
2141   PetscScalar    *array;
2142   PetscInt       *temp_global_dofs;
2143   PetscScalar    globalsum;
2144   PetscInt       i,j,s;
2145   PetscInt       nlocals,first_index,old_index,max_local;
2146   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2147   PetscMPIInt    *dof_sizes,*dof_displs;
2148   PetscBool      first_found;
2149   PetscErrorCode ierr;
2150 
2151   PetscFunctionBegin;
2152   /* mpi buffers */
2153   MPI_Comm_size(comm,&size_prec_comm);
2154   MPI_Comm_rank(comm,&rank_prec_comm);
2155   j = ( !rank_prec_comm ? size_prec_comm : 0);
2156   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2157   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2158   /* get maximum size of subset */
2159   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2160   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2161   max_local = 0;
2162   if (n_local_dofs) {
2163     max_local = temp_global_dofs[0];
2164     for (i=1;i<n_local_dofs;i++) {
2165       if (max_local < temp_global_dofs[i] ) {
2166         max_local = temp_global_dofs[i];
2167       }
2168     }
2169   }
2170   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2171   max_global++;
2172   max_local = 0;
2173   if (n_local_dofs) {
2174     max_local = local_dofs[0];
2175     for (i=1;i<n_local_dofs;i++) {
2176       if (max_local < local_dofs[i] ) {
2177         max_local = local_dofs[i];
2178       }
2179     }
2180   }
2181   max_local++;
2182   /* allocate workspace */
2183   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2184   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2185   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2186   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2187   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2188   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2189   /* create scatter */
2190   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2191   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2192   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2193   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2194   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2195   /* init array */
2196   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2197   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2198   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2199   if (local_dofs_mult) {
2200     for (i=0;i<n_local_dofs;i++) {
2201       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2202     }
2203   } else {
2204     for (i=0;i<n_local_dofs;i++) {
2205       array[local_dofs[i]]=1.0;
2206     }
2207   }
2208   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2209   /* scatter into global vec and get total number of global dofs */
2210   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2211   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2212   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2213   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2214   /* Fill global_vec with cumulative function for global numbering */
2215   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2216   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2217   nlocals = 0;
2218   first_index = -1;
2219   first_found = PETSC_FALSE;
2220   for (i=0;i<s;i++) {
2221     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2222       first_found = PETSC_TRUE;
2223       first_index = i;
2224     }
2225     nlocals += (PetscInt)PetscRealPart(array[i]);
2226   }
2227   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2228   if (!rank_prec_comm) {
2229     dof_displs[0]=0;
2230     for (i=1;i<size_prec_comm;i++) {
2231       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2232     }
2233   }
2234   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2235   if (first_found) {
2236     array[first_index] += (PetscScalar)nlocals;
2237     old_index = first_index;
2238     for (i=first_index+1;i<s;i++) {
2239       if (PetscRealPart(array[i]) > 0.0) {
2240         array[i] += array[old_index];
2241         old_index = i;
2242       }
2243     }
2244   }
2245   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2246   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2247   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2248   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2249   /* get global ordering of local dofs */
2250   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2251   if (local_dofs_mult) {
2252     for (i=0;i<n_local_dofs;i++) {
2253       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2254     }
2255   } else {
2256     for (i=0;i<n_local_dofs;i++) {
2257       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2258     }
2259   }
2260   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2261   /* free workspace */
2262   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2263   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2264   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2265   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2266   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2267   /* return pointer to global ordering of local dofs */
2268   *global_numbering_subset = temp_global_dofs;
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2274 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2275 {
2276   PetscInt       i,j;
2277   PetscScalar    *alphas;
2278   PetscErrorCode ierr;
2279 
2280   PetscFunctionBegin;
2281   /* this implements stabilized Gram-Schmidt */
2282   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2283   for (i=0;i<n;i++) {
2284     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2285     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2286     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2287   }
2288   ierr = PetscFree(alphas);CHKERRQ(ierr);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatConvert_IS_AIJ"
2295 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2296 {
2297   Mat new_mat;
2298   Mat_IS *matis = (Mat_IS*)(mat->data);
2299   /* info on mat */
2300   PetscInt bs,rows,cols;
2301   PetscInt lrows,lcols;
2302   PetscInt local_rows,local_cols;
2303   PetscMPIInt nsubdomains;
2304   /* preallocation */
2305   Vec vec_dnz,vec_onz;
2306   PetscScalar *my_dnz,*my_onz,*array;
2307   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2308   PetscInt  index_row,index_col,owner;
2309   PetscInt  *local_indices,*global_indices;
2310   /* work */
2311   PetscInt i,j;
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   /* CHECKS */
2316   /* get info from mat */
2317   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2318   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2319   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2320   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2321 
2322   /* MAT_INITIAL_MATRIX */
2323   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2324   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2325   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2326   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2327   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2328   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2329 
2330   /*
2331     preallocation
2332   */
2333 
2334   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2335   /*
2336      Some vectors are needed to sum up properly on shared interface dofs.
2337      Note that preallocation is not exact, since it overestimates nonzeros
2338   */
2339 /*
2340   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2341   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2342   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2343   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2344 */
2345   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2346   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2347   /* All processes need to compute entire row ownership */
2348   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2349   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2350   for (i=0;i<nsubdomains;i++) {
2351     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2352       row_ownership[j]=i;
2353     }
2354   }
2355   /* map indices of local mat to global values */
2356   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2357   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2358   for (i=0;i<local_rows;i++) local_indices[i]=i;
2359   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2360   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2361 
2362   /*
2363      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2364      then, they will be summed up properly. This way, preallocation is always sufficient
2365   */
2366   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2367   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2368   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2369   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2370   for (i=0;i<local_rows;i++) {
2371     index_row = global_indices[i];
2372     for (j=i;j<local_rows;j++) {
2373       owner = row_ownership[index_row];
2374       index_col = global_indices[j];
2375       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2376         my_dnz[i] += 1.0;
2377       } else { /* offdiag block */
2378         my_onz[i] += 1.0;
2379       }
2380       /* same as before, interchanging rows and cols */
2381       if (i != j) {
2382         owner = row_ownership[index_col];
2383         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2384           my_dnz[j] += 1.0;
2385         } else {
2386           my_onz[j] += 1.0;
2387         }
2388       }
2389     }
2390   }
2391   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2392   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2393   if (local_rows) { /* multilevel guard */
2394     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2395     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2396   }
2397   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2398   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2399   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2400   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2401   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2402   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2403   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2404 
2405   /* set computed preallocation in dnz and onz */
2406   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2407   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2408   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2409   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2410   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2411   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2412   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2413   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2414 
2415   /* Resize preallocation if overestimated */
2416   for (i=0;i<lrows;i++) {
2417     dnz[i] = PetscMin(dnz[i],lcols);
2418     onz[i] = PetscMin(onz[i],cols-lcols);
2419   }
2420   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2421   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2422 
2423   /*
2424     Set values. Very Basic.
2425   */
2426   for (i=0;i<local_rows;i++) {
2427     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2428     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2429     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2430     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2431     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2432   }
2433   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2434   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2435   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2436 
2437   /* get back mat */
2438   *M = new_mat;
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatISSubassemble_Private"
2444 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2445 {
2446   Mat             subdomain_adj;
2447   IS              new_ranks,ranks_send_to;
2448   MatPartitioning partitioner;
2449   Mat_IS          *matis;
2450   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2451   PetscInt        prank;
2452   PetscMPIInt     size,rank,color;
2453   PetscInt        *xadj,*adjncy,*oldranks;
2454   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2455   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2456   PetscErrorCode  ierr;
2457   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2458   PetscSubcomm    subcomm;
2459 
2460   PetscFunctionBegin;
2461   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2462   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2463   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2464 
2465   /* Get info on mapping */
2466   matis = (Mat_IS*)(mat->data);
2467   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2468   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2469 
2470   /* build local CSR graph of subdomains' connectivity */
2471   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2472   xadj[0] = 0;
2473   xadj[1] = PetscMax(n_neighs-1,0);
2474   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2475   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2476 
2477   if (threshold) {
2478     PetscInt* count,min_threshold;
2479     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2480     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2481     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2482       for (j=0;j<n_shared[i];j++) {
2483         count[shared[i][j]] += 1;
2484       }
2485     }
2486     /* adapt threshold since we dont want to lose significant connections */
2487     min_threshold = n_neighs;
2488     for (i=1;i<n_neighs;i++) {
2489       for (j=0;j<n_shared[i];j++) {
2490         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2491       }
2492     }
2493     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2494     threshold = PetscMax(min_threshold+1,threshold);
2495     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2496     xadj[1] = 0;
2497     for (i=1;i<n_neighs;i++) {
2498       for (j=0;j<n_shared[i];j++) {
2499         if (count[shared[i][j]] < threshold) {
2500           adjncy[xadj[1]] = neighs[i];
2501           adjncy_wgt[xadj[1]] = n_shared[i];
2502           xadj[1]++;
2503           break;
2504         }
2505       }
2506     }
2507     ierr = PetscFree(count);CHKERRQ(ierr);
2508   } else {
2509     if (xadj[1]) {
2510       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2511       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2512     }
2513   }
2514   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2515   if (use_square) {
2516     for (i=0;i<xadj[1];i++) {
2517       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2518     }
2519   }
2520   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2521 
2522   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2523 
2524   /*
2525     Restrict work on active processes only.
2526   */
2527   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2528   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2529   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2530   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2531   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2532   if (color) {
2533     ierr = PetscFree(xadj);CHKERRQ(ierr);
2534     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2535     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2536   } else {
2537     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2538     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2539     prank = rank;
2540     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2541     for (i=0;i<size;i++) {
2542       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2543     }
2544     for (i=0;i<xadj[1];i++) {
2545       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2546     }
2547     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2548     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2549     n_subdomains = (PetscInt)size;
2550     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2551 
2552     /* Partition */
2553     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2554     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2555     if (use_vwgt) {
2556       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2557       v_wgt[0] = local_size;
2558       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2559     }
2560     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2561     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2562     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2563     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2564     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2565 
2566     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2567     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2568     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2569     /* clean up */
2570     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2571     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2572     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2573     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2574   }
2575   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2576 
2577   /* assemble parallel IS for sends */
2578   i = 1;
2579   if (color) i=0;
2580   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2581   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2582 
2583   /* get back IS */
2584   *is_sends = ranks_send_to;
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatISSubassemble"
2592 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2593 {
2594   Mat                    local_mat,new_mat;
2595   Mat_IS                 *matis;
2596   IS                     is_sends_internal;
2597   PetscInt               rows,cols;
2598   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2599   PetscBool              ismatis,isdense;
2600   ISLocalToGlobalMapping l2gmap;
2601   PetscInt*              l2gmap_indices;
2602   const PetscInt*        is_indices;
2603   MatType                new_local_type;
2604   MatTypePrivate         new_local_type_private;
2605   /* buffers */
2606   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2607   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2608   /* MPI */
2609   MPI_Comm               comm;
2610   PetscMPIInt            n_sends,n_recvs,commsize;
2611   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2612   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2613   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2614   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2615   PetscErrorCode         ierr;
2616 
2617   PetscFunctionBegin;
2618   /* checks */
2619   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2620   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2621   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2622   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2623   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2624   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2625   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2626   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2627   PetscValidLogicalCollectiveInt(mat,bs,0);
2628   /* prepare IS for sending if not provided */
2629   if (!is_sends) {
2630     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2631     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2632   } else {
2633     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2634     is_sends_internal = is_sends;
2635   }
2636 
2637   /* get pointer of MATIS data */
2638   matis = (Mat_IS*)mat->data;
2639 
2640   /* get comm */
2641   comm = PetscObjectComm((PetscObject)mat);
2642 
2643   /* compute number of sends */
2644   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2645   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2646 
2647   /* compute number of receives */
2648   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2649   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2650   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2651   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2652   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2653   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2654   ierr = PetscFree(iflags);CHKERRQ(ierr);
2655 
2656   /* prepare send/receive buffers */
2657   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2658   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2659   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2660   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2661 
2662   /* Get data from local mat */
2663   if (!isdense) {
2664     /* TODO: See below some guidelines on how to prepare the local buffers */
2665     /*
2666        send_buffer_vals should contain the raw values of the local matrix
2667        send_buffer_idxs should contain:
2668        - MatType_PRIVATE type
2669        - PetscInt        size_of_l2gmap
2670        - PetscInt        global_row_indices[size_of_l2gmap]
2671        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2672     */
2673   } else {
2674     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2675     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2676     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2677     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2678     send_buffer_idxs[1] = i;
2679     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2680     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2681     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2682     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2683     for (i=0;i<n_sends;i++) {
2684       ilengths_vals[is_indices[i]] = len*len;
2685       ilengths_idxs[is_indices[i]] = len+2;
2686     }
2687   }
2688   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2689   buf_size_idxs = 0;
2690   buf_size_vals = 0;
2691   for (i=0;i<n_recvs;i++) {
2692     buf_size_idxs += (PetscInt)olengths_idxs[i];
2693     buf_size_vals += (PetscInt)olengths_vals[i];
2694   }
2695   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2696   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2697 
2698   /* get new tags for clean communications */
2699   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2700   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2701 
2702   /* allocate for requests */
2703   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2704   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2705   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2706   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2707 
2708   /* communications */
2709   ptr_idxs = recv_buffer_idxs;
2710   ptr_vals = recv_buffer_vals;
2711   for (i=0;i<n_recvs;i++) {
2712     source_dest = onodes[i];
2713     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2714     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2715     ptr_idxs += olengths_idxs[i];
2716     ptr_vals += olengths_vals[i];
2717   }
2718   for (i=0;i<n_sends;i++) {
2719     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2720     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2721     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2722   }
2723   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2724   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2725 
2726   /* assemble new l2g map */
2727   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2728   ptr_idxs = recv_buffer_idxs;
2729   buf_size_idxs = 0;
2730   for (i=0;i<n_recvs;i++) {
2731     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2732     ptr_idxs += olengths_idxs[i];
2733   }
2734   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2735   ptr_idxs = recv_buffer_idxs;
2736   buf_size_idxs = 0;
2737   for (i=0;i<n_recvs;i++) {
2738     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2739     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2740     ptr_idxs += olengths_idxs[i];
2741   }
2742   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2743   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2744   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2745 
2746   /* infer new local matrix type from received local matrices type */
2747   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2748   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2749   new_local_type_private = MATAIJ_PRIVATE;
2750   new_local_type = MATSEQAIJ;
2751   if (n_recvs) {
2752     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2753     ptr_idxs = recv_buffer_idxs;
2754     for (i=0;i<n_recvs;i++) {
2755       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2756         new_local_type_private = MATAIJ_PRIVATE;
2757         break;
2758       }
2759       ptr_idxs += olengths_idxs[i];
2760     }
2761     switch (new_local_type_private) {
2762       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2763         new_local_type = MATSEQAIJ;
2764         bs = 1;
2765         break;
2766       case MATAIJ_PRIVATE:
2767         new_local_type = MATSEQAIJ;
2768         bs = 1;
2769         break;
2770       case MATBAIJ_PRIVATE:
2771         new_local_type = MATSEQBAIJ;
2772         break;
2773       case MATSBAIJ_PRIVATE:
2774         new_local_type = MATSEQSBAIJ;
2775         break;
2776       default:
2777         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2778         break;
2779     }
2780   }
2781 
2782   /* create MATIS object */
2783   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2784   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2785   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2786   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2787   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2788   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2789 
2790   /* set values */
2791   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2792   ptr_vals = recv_buffer_vals;
2793   ptr_idxs = recv_buffer_idxs;
2794   for (i=0;i<n_recvs;i++) {
2795     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2796       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2797       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2798       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2799       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2800       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2801     }
2802     ptr_idxs += olengths_idxs[i];
2803     ptr_vals += olengths_vals[i];
2804   }
2805   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2806   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2807   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2808   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2809 
2810   { /* check */
2811     Vec       lvec,rvec;
2812     PetscReal infty_error;
2813 
2814     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2815     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2816     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2817     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2818     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2819     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2820     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2821     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2822     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2823   }
2824 
2825   /* free workspace */
2826   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2827   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2828   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2829   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2830   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2831   if (isdense) {
2832     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2833     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2834   } else {
2835     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2836   }
2837   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2838   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2839   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2840   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2841   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2842   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2843   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2844   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2845   ierr = PetscFree(onodes);CHKERRQ(ierr);
2846   /* get back new mat */
2847   *mat_n = new_mat;
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 #undef __FUNCT__
2852 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2853 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2854 {
2855   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2856   PC_IS                  *pcis = (PC_IS*)pc->data;
2857   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2858   MatNullSpace           CoarseNullSpace=NULL;
2859   ISLocalToGlobalMapping coarse_islg;
2860   IS                     coarse_is;
2861   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2862   PetscInt               im_active=-1,active_procs=-1;
2863   PC                     pc_temp;
2864   PCType                 coarse_pc_type;
2865   KSPType                coarse_ksp_type;
2866   PetscBool              multilevel_requested,multilevel_allowed;
2867   PetscBool              setsym,issym,isbddc,isnn;
2868   MatStructure           matstruct;
2869   PetscErrorCode         ierr;
2870 
2871   PetscFunctionBegin;
2872   /* Assign global numbering to coarse dofs */
2873   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2874 
2875   /* infer some info from user */
2876   issym = PETSC_FALSE;
2877   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2878   multilevel_allowed = PETSC_FALSE;
2879   multilevel_requested = PETSC_FALSE;
2880   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2881   if (multilevel_requested) {
2882     /* count "active processes" */
2883     im_active = !!(pcis->n);
2884     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2885     if (active_procs/pcbddc->coarsening_ratio < 2) {
2886       multilevel_allowed = PETSC_FALSE;
2887     } else {
2888       multilevel_allowed = PETSC_TRUE;
2889     }
2890   }
2891 
2892   /* set defaults for coarse KSP and PC */
2893   if (multilevel_allowed) {
2894     if (issym) {
2895       coarse_ksp_type = KSPRICHARDSON;
2896     } else {
2897       coarse_ksp_type = KSPCHEBYSHEV;
2898     }
2899     coarse_pc_type = PCBDDC;
2900   } else {
2901     coarse_ksp_type = KSPPREONLY;
2902     coarse_pc_type = PCREDUNDANT;
2903   }
2904 
2905   /* create the coarse KSP object only once with defaults */
2906   if (!pcbddc->coarse_ksp) {
2907     char prefix[256],str_level[3];
2908     size_t len;
2909     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2910     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2911     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2912     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2913     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2914     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2915     /* prefix */
2916     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
2917     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2918     if (!pcbddc->current_level) {
2919       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2920       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
2921     } else {
2922       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2923       if (pcbddc->current_level>1) len -= 2;
2924       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
2925       *(prefix+len)='\0';
2926       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
2927       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
2928     }
2929     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
2930   }
2931   /* allow user customization */
2932   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
2933   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
2934   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
2935 
2936   /* get some info after set from options */
2937   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2938   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
2939   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
2940   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
2941     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2942     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2943     isbddc = PETSC_FALSE;
2944   }
2945 
2946   /* propagate BDDC info to the next level */
2947   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
2948   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2949   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
2950 
2951   /* creates temporary MATIS object for coarse matrix */
2952   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2953   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2954   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2955   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
2956   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
2957   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2958   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
2960   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
2961   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
2962 
2963   /* assemble coarse matrix */
2964   if (isbddc || isnn) {
2965     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
2966   } else {
2967     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2968   }
2969   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
2970 
2971   /* create local to global scatters for coarse problem */
2972   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
2973   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
2974   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
2975 
2976   /* propagate symmetry info to coarse matrix */
2977   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
2978 
2979   /* Compute coarse null space (special handling by BDDC only) */
2980   if (pcbddc->NullSpace) {
2981     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
2982     if (isbddc) {
2983       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
2984     } else {
2985       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
2986     }
2987   }
2988 
2989   /* set operators */
2990   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
2991   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
2992 
2993   /* additional KSP customization */
2994   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
2995   if (max_it < 5) {
2996     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
2997   }
2998   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
2999 
3000 
3001   /* print some info if requested */
3002   if (pcbddc->dbg_flag) {
3003     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3004     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3005     if (!multilevel_allowed) {
3006       if (multilevel_requested) {
3007         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3008       } else if (pcbddc->max_levels) {
3009         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3010       }
3011     }
3012     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3013     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3014   }
3015 
3016   /* setup coarse ksp */
3017   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3018   if (pcbddc->dbg_flag) {
3019     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3020     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3021     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3022     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3023   }
3024 
3025   /* Check coarse problem if requested */
3026   if (pcbddc->dbg_flag) {
3027     KSP       check_ksp;
3028     KSPType   check_ksp_type;
3029     PC        check_pc;
3030     Vec       check_vec;
3031     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3032     PetscInt  its;
3033     PetscBool ispreonly,compute;
3034 
3035     /* Create ksp object suitable for estimation of extreme eigenvalues */
3036     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3037     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3038     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3039     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3040     if (ispreonly) {
3041       check_ksp_type = KSPPREONLY;
3042       compute = PETSC_FALSE;
3043     } else {
3044       if (issym) check_ksp_type = KSPCG;
3045       else check_ksp_type = KSPGMRES;
3046       compute = PETSC_TRUE;
3047     }
3048     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3049     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3050     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3051     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3052     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3053     /* create random vec */
3054     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3055     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3056     if (CoarseNullSpace) {
3057       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3058     }
3059     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3060     /* solve coarse problem */
3061     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3062     if (CoarseNullSpace) {
3063       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3064     }
3065     /* check coarse problem residual error */
3066     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3067     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3068     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3069     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3070     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3071     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3072     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3073     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3074     /* get eigenvalue estimation if preonly has not been requested */
3075     if (!ispreonly) {
3076       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3077       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3078       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3079     }
3080     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3081     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3082   }
3083   /* free memory */
3084   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3085   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3086   PetscFunctionReturn(0);
3087 }
3088 
3089 #undef __FUNCT__
3090 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3091 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3092 {
3093   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3094   PC_IS*         pcis = (PC_IS*)pc->data;
3095   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3096   PetscInt       i,j,coarse_size;
3097   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3098   PetscErrorCode ierr;
3099 
3100   PetscFunctionBegin;
3101   /* get indices in local ordering for vertices and constraints */
3102   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3103   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3104   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3105   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3106   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3107   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3108   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3109 
3110   /* Compute global number of coarse dofs */
3111   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3112 
3113   /* check numbering */
3114   if (pcbddc->dbg_flag) {
3115     PetscScalar coarsesum,*array;
3116 
3117     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3118     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3119     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3120     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3121     for (i=0;i<pcbddc->local_primal_size;i++) {
3122       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3123     }
3124     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3125     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3126     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3127     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3128     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3129     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3130     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3131     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3132     for (i=0;i<pcis->n;i++) {
3133       if (array[i] == 1.0) {
3134         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3135       }
3136     }
3137     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3138     for (i=0;i<pcis->n;i++) {
3139       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3140     }
3141     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3142     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3143     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3144     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3145     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3146     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3147     if (pcbddc->dbg_flag > 1) {
3148       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3149       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3150       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3151       for (i=0;i<pcbddc->local_primal_size;i++) {
3152         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3153       }
3154       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3155     }
3156     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3157   }
3158   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3159   /* get back data */
3160   *coarse_size_n = coarse_size;
3161   *local_primal_indices_n = local_primal_indices;
3162   PetscFunctionReturn(0);
3163 }
3164 
3165