xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision dcca6d9d80ebd869fe6029bd05a3aa9faafef49e) !
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   pcbddc->n_ISForDofs = 0;
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
102   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "PCBDDCCreateWorkVectors"
111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
112 {
113   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
114   PC_IS          *pcis = (PC_IS*)pc->data;
115   VecType        impVecType;
116   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
117   PetscErrorCode ierr;
118 
119   PetscFunctionBegin;
120   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
121   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
122   local_primal_size = n_constraints+n_vertices;
123   n_R = pcis->n-n_vertices;
124   /* local work vectors */
125   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
126   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
127   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
128   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
129   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
130   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
131   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
132   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
133   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
134   if (n_constraints) {
135     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
136     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
137     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "PCBDDCSetUpCorrection"
144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
145 {
146   PetscErrorCode         ierr;
147   /* pointers to pcis and pcbddc */
148   PC_IS*                 pcis = (PC_IS*)pc->data;
149   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
150   /* submatrices of local problem */
151   Mat                    A_RV,A_VR,A_VV;
152   /* working matrices */
153   Mat                    M1,M2,M3,C_CR;
154   /* working vectors */
155   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
156   /* additional working stuff */
157   IS                     is_aux;
158   ISLocalToGlobalMapping BtoNmap;
159   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
160   const PetscScalar      *array,*row_cmat_values;
161   const PetscInt         *row_cmat_indices,*idx_R_local;
162   PetscInt               *vertices,*idx_V_B,*auxindices;
163   PetscInt               n_vertices,n_constraints,size_of_constraint;
164   PetscInt               i,j,n_R,n_D,n_B;
165   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
166   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183 
184   /* Allocating some extra storage just to be safe */
185   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
186   for (i=0;i<pcis->n;i++) auxindices[i]=i;
187 
188   /* vertices in boundary numbering */
189   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
190   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
191   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
192   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
193   if (i != n_vertices) {
194     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
195   }
196 
197   /* Precompute stuffs needed for preprocessing and application of BDDC*/
198   if (n_constraints) {
199     /* work vectors */
200     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
201     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
202     /* auxiliary matrices */
203     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
204     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
205     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
206     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
207 
208     /* Extract constraints on R nodes: C_{CR}  */
209     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
210     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
211     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
212 
213     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
214     for (i=0;i<n_constraints;i++) {
215       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
216       /* Get row of constraint matrix in R numbering */
217       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
218       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
219       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
220       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
221       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
222       /* Solve for row of constraint matrix in R numbering */
223       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
224       /* Set values in local_auxmat2 */
225       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
226       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
227       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
228     }
229     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
231     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
232 
233     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
234     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
235     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
236     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
237     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
238     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
239     ierr = MatSetUp(M1);CHKERRQ(ierr);
240     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
241     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
242     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
243     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
244     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
245     ierr = MatDestroy(&M2);CHKERRQ(ierr);
246     ierr = MatDestroy(&M3);CHKERRQ(ierr);
247     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
248     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
249   }
250 
251   /* Get submatrices from subdomain matrix */
252   if (n_vertices) {
253     PetscInt ibs,mbs;
254     PetscBool issbaij;
255     Mat newmat;
256 
257     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
258     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
259     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
260     if (ibs != mbs) { /* need to convert to SEQAIJ */
261       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
262       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
263       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
264       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
265       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
266     } else {
267       /* this is safe */
268       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
269       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
270       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
271         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
272         /* which of the two approaches is faster? */
273         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
274         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
275         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
276         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
277         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
278       } else {
279         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
280         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
281       }
282     }
283     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
284     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
285     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
286   }
287 
288   /* Matrix of coarse basis functions (local) */
289   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
290   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
291   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
292   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
293   if (pcbddc->switch_static || pcbddc->dbg_flag) {
294     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
295     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
296     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
297     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
298   }
299 
300   if (pcbddc->dbg_flag) {
301     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
302     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
303     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
304   }
305   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
306   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
307 
308   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
309 
310   /* vertices */
311   for (i=0;i<n_vertices;i++) {
312     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
313     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
314     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
315     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
317     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
318     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
319     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
320     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
321     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
322     if (n_constraints) {
323       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
324       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
325       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
326     }
327     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
328     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
329 
330     /* Set values in coarse basis function and subdomain part of coarse_mat */
331     /* coarse basis functions */
332     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
333     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
334     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
335     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
336     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
337     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
338     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
339     if (pcbddc->switch_static || pcbddc->dbg_flag) {
340       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
341       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
342       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
343       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
345     }
346     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
347     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
349     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350     if (n_constraints) {
351       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354     }
355 
356     /* check */
357     if (pcbddc->dbg_flag) {
358       /* assemble subdomain vector on local nodes */
359       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
360       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
361       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
362       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
363       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
364       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
365       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
366       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
367       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
368       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
369       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
370       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
371       if (n_constraints) {
372         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
373         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
374         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
375       }
376       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
377       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
379       /* check saddle point solution */
380       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
381       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
382       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
383       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
384       /* shift by the identity matrix */
385       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
386       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
387       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
388       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
389     }
390   }
391 
392   /* constraints */
393   for (i=0;i<n_constraints;i++) {
394     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
395     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
396     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
397     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
398     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
399     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
400     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
401     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
402     if (n_vertices) {
403       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
404     }
405     /* Set values in coarse basis function and subdomain part of coarse_mat */
406     /* coarse basis functions */
407     j = i+n_vertices; /* don't touch this! */
408     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
409     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
410     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
411     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
412     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
413     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
414     if (pcbddc->switch_static || pcbddc->dbg_flag) {
415       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
416       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
417       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
418       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
419       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
420     }
421     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
422     if (n_vertices) {
423       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
425       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
426     }
427     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
428     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
429     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
430 
431     if (pcbddc->dbg_flag) {
432       /* assemble subdomain vector on nodes */
433       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
434       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
435       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
436       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
437       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
438       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
439       /* assemble subdomain vector of lagrange multipliers */
440       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
441       if (n_vertices) {
442         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
443         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
444         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
445       }
446       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
447       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
448       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
449       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
452       /* check saddle point solution */
453       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
454       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
455       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
456       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
457       /* shift by the identity matrix */
458       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
459       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
460       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
461       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
462     }
463   }
464   /* call assembling routines for local coarse basis */
465   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   if (pcbddc->switch_static || pcbddc->dbg_flag) {
468     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470   }
471 
472   /* compute other basis functions for non-symmetric problems */
473   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
474   if (!setsym || (setsym && !issym)) {
475     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
476     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
477     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
478     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
479     if (pcbddc->switch_static || pcbddc->dbg_flag) {
480       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
481       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
482       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
483       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
484     }
485     for (i=0;i<pcbddc->local_primal_size;i++) {
486       if (n_constraints) {
487         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
488         for (j=0;j<n_constraints;j++) {
489           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
490         }
491         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
492         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
493       }
494       if (i<n_vertices) {
495         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
496         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
497         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
498         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
499         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
500         if (n_constraints) {
501           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
502         }
503       } else {
504         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
505       }
506       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
507       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
508       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
511       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
513       if (i<n_vertices) {
514         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
515       }
516       if (pcbddc->switch_static || pcbddc->dbg_flag) {
517         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
519         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
520         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
521         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
522       }
523 
524       if (pcbddc->dbg_flag) {
525         /* assemble subdomain vector on nodes */
526         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
527         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
528         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
529         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
530         if (i<n_vertices) {
531           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
532         }
533         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
534         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
535         /* assemble subdomain vector of lagrange multipliers */
536         for (j=0;j<pcbddc->local_primal_size;j++) {
537           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
538         }
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         /* check saddle point solution */
542         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
543         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
544         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
545         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
546         /* shift by the identity matrix */
547         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
548         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
549         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
550         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
551       }
552     }
553     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555     if (pcbddc->switch_static || pcbddc->dbg_flag) {
556       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     }
559   }
560   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
561   /* Checking coarse_sub_mat and coarse basis functios */
562   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
563   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
564   if (pcbddc->dbg_flag) {
565     Mat         coarse_sub_mat;
566     Mat         AUXMAT,TM1,TM2,TM3,TM4;
567     Mat         coarse_phi_D,coarse_phi_B;
568     Mat         coarse_psi_D,coarse_psi_B;
569     Mat         A_II,A_BB,A_IB,A_BI;
570     MatType     checkmattype=MATSEQAIJ;
571     PetscReal   real_value;
572 
573     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
574     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
575     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
576     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
577     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
578     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
579     if (pcbddc->coarse_psi_B) {
580       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
581       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
582     }
583     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
584 
585     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
586     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
587     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
588     if (pcbddc->coarse_psi_B) {
589       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
590       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
591       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
592       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
593       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
594       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
595       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
596       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
597       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
598       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601     } else {
602       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
603       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
604       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
605       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
606       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
607       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
608       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
609       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
610     }
611     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
612     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
613     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
614     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
615     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
616     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
617     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
618     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
619     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
620     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
621     for (i=0;i<pcbddc->local_primal_size;i++) {
622       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
623     }
624     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
625     for (i=0;i<pcbddc->local_primal_size;i++) {
626       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
627     }
628     if (pcbddc->coarse_psi_B) {
629       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
630       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
631         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
632       }
633       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
634       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
635         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
636       }
637     }
638     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
639     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
640     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
641     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
642     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
643     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
644     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
645     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
646     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
647     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
648     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
649     if (pcbddc->coarse_psi_B) {
650       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
651       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
652     }
653     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
654     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
655     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
656     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
657   }
658   /* free memory */
659   if (n_vertices) {
660     ierr = PetscFree(vertices);CHKERRQ(ierr);
661     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
662     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
663     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
664     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
665     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
666   }
667   if (n_constraints) {
668     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
669     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
670     ierr = MatDestroy(&M1);CHKERRQ(ierr);
671     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
672   }
673   ierr = PetscFree(auxindices);CHKERRQ(ierr);
674   /* get back data */
675   *coarse_submat_vals_n = coarse_submat_vals;
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
681 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
682 {
683   PC_IS*            pcis = (PC_IS*)(pc->data);
684   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
685   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
686   PetscBool         issbaij,isbaij;
687   /* manage repeated solves */
688   MatReuse          reuse;
689   MatStructure      matstruct;
690   PetscErrorCode    ierr;
691 
692   PetscFunctionBegin;
693   /* get mat flags */
694   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
695   reuse = MAT_INITIAL_MATRIX;
696   if (pc->setupcalled) {
697     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
698     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
699     if (matstruct == SAME_NONZERO_PATTERN) {
700       reuse = MAT_REUSE_MATRIX;
701     } else {
702       reuse = MAT_INITIAL_MATRIX;
703     }
704   }
705   if (reuse == MAT_INITIAL_MATRIX) {
706     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
707     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
708     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
709     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
710     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
711   }
712 
713   /* transform local matrices if needed */
714   if (pcbddc->use_change_of_basis) {
715     Mat         change_mat_all;
716     PetscScalar *row_cmat_values;
717     PetscInt    *row_cmat_indices;
718     PetscInt    *nnz,*is_indices,*temp_indices;
719     PetscInt    i,j,k,n_D,n_B;
720 
721     /* Get Non-overlapping dimensions */
722     n_B = pcis->n_B;
723     n_D = pcis->n-n_B;
724 
725     /* compute nonzero structure of change of basis on all local nodes */
726     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
727     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
728     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
729     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
730     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
731     k=1;
732     for (i=0;i<n_B;i++) {
733       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
734       nnz[is_indices[i]]=j;
735       if (k < j) k = j;
736       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
737     }
738     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
739     /* assemble change of basis matrix on the whole set of local dofs */
740     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
741     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
742     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
743     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
744     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
745     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
746     for (i=0;i<n_D;i++) {
747       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
748     }
749     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
750     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
751     for (i=0;i<n_B;i++) {
752       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
753       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
754       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
755       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
756     }
757     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
758     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
759     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
760     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
761     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
762     if (!issbaij && !isbaij) {
763       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
764     } else {
765       Mat work_mat;
766       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
767       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
768       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
769     }
770     /*
771     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
772     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
773     */
774     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
775     ierr = PetscFree(nnz);CHKERRQ(ierr);
776     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
777   } else {
778     /* without change of basis, the local matrix is unchanged */
779     if (!pcbddc->local_mat) {
780       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
781       pcbddc->local_mat = matis->A;
782     }
783   }
784 
785   /* get submatrices */
786   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
787   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
788   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
789   if (!issbaij) {
790     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
791     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
792   } else {
793     Mat newmat;
794     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
795     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
796     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
797     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
804 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
805 {
806   PC_IS*         pcis = (PC_IS*)(pc->data);
807   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
808   IS             is_aux1,is_aux2;
809   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
810   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
811   PetscInt       vbs,bs;
812   PetscBT        bitmask;
813   PetscErrorCode ierr;
814 
815   PetscFunctionBegin;
816   /* Set Non-overlapping dimensions */
817   n_B = pcis->n_B; n_D = pcis->n - n_B;
818   /* get vertex indices from constraint matrix */
819   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
820 
821   /* create auxiliary bitmask */
822   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
823   for (i=0;i<n_vertices;i++) {
824     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
825   }
826 
827   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
828   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
829   for (i=0, n_R=0; i<pcis->n; i++) {
830     if (!PetscBTLookup(bitmask,i)) {
831       idx_R_local[n_R] = i;
832       n_R++;
833     }
834   }
835 
836   /* Block code */
837   vbs = 1;
838   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
839   if (bs>1 && !(n_vertices%bs)) {
840     PetscBool is_blocked = PETSC_TRUE;
841     PetscInt  *vary;
842     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
843     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
844     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
845     for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++;
846     for (i=0; i<n_vertices; i++) {
847       if (vary[i]!=0 && vary[i]!=bs) {
848         is_blocked = PETSC_FALSE;
849         break;
850       }
851     }
852     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
853       vbs = bs;
854       for (i=0;i<n_R/vbs;i++) {
855         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
856       }
857     }
858     ierr = PetscFree(vary);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(vertices);CHKERRQ(ierr);
861   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
862   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
863 
864   /* print some info if requested */
865   if (pcbddc->dbg_flag) {
866     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
867     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
868     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
869     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
870     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
871     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
872     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
873   }
874 
875   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
876   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
877   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
878   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
879   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
880   for (i=0; i<n_D; i++) {
881     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
882   }
883   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
884   for (i=0, j=0; i<n_R; i++) {
885     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
886       aux_array1[j++] = i;
887     }
888   }
889   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
890   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
891   for (i=0, j=0; i<n_B; i++) {
892     if (!PetscBTLookup(bitmask,is_indices[i])) {
893       aux_array2[j++] = i;
894     }
895   }
896   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
897   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
898   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
899   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
900   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
901 
902   if (pcbddc->switch_static || pcbddc->dbg_flag) {
903     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
904     for (i=0, j=0; i<n_R; i++) {
905       if (PetscBTLookup(bitmask,idx_R_local[i])) {
906         aux_array1[j++] = i;
907       }
908     }
909     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
910     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
911     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
912   }
913   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
914   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 
919 #undef __FUNCT__
920 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
921 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
922 {
923   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
924   PC_IS          *pcis = (PC_IS*)pc->data;
925   PC             pc_temp;
926   Mat            A_RR;
927   Vec            vec1,vec2,vec3;
928   MatStructure   matstruct;
929   PetscScalar    m_one = -1.0;
930   PetscReal      value;
931   PetscInt       n_D,n_R,ibs,mbs;
932   PetscBool      use_exact,use_exact_reduced,issbaij;
933   PetscErrorCode ierr;
934   /* prefixes stuff */
935   char           dir_prefix[256],neu_prefix[256],str_level[3];
936   size_t         len;
937 
938   PetscFunctionBegin;
939   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
940 
941   /* compute prefixes */
942   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
943   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
944   if (!pcbddc->current_level) {
945     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
946     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
947     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
948     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
949   } else {
950     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
951     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
952     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
953     len -= 15; /* remove "pc_bddc_coarse_" */
954     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
955     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
956     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
957     *(dir_prefix+len)='\0';
958     *(neu_prefix+len)='\0';
959     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
960     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
961     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
962     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
963   }
964 
965   /* DIRICHLET PROBLEM */
966   /* Matrix for Dirichlet problem is pcis->A_II */
967   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
968   if (!pcbddc->ksp_D) { /* create object if not yet build */
969     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
970     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
971     /* default */
972     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
973     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
974     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
975     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
976     if (issbaij) {
977       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
978     } else {
979       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
980     }
981     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
982   }
983   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
984   /* Allow user's customization */
985   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
986   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
987   if (!n_D) {
988     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
989     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
990   }
991   /* Set Up KSP for Dirichlet problem of BDDC */
992   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
993   /* set ksp_D into pcis data */
994   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
995   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
996   pcis->ksp_D = pcbddc->ksp_D;
997 
998   /* NEUMANN PROBLEM */
999   /* Matrix for Neumann problem is A_RR -> we need to create it */
1000   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1001   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1002   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1003   if (ibs != mbs) {
1004     Mat newmat;
1005     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1006     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1007     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1008   } else {
1009     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
1010   }
1011   if (!pcbddc->ksp_R) { /* create object if not yet build */
1012     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1013     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1014     /* default */
1015     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1016     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1017     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1018     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1019     if (issbaij) {
1020       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1021     } else {
1022       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1023     }
1024     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1025   }
1026   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1027   /* Allow user's customization */
1028   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1029   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1030   if (!n_R) {
1031     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1032     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1033   }
1034   /* Set Up KSP for Neumann problem of BDDC */
1035   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1036 
1037   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1038 
1039   /* Dirichlet */
1040   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
1041   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1042   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1043   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
1044   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
1045   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1046   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1047   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1048   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1049   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1050   /* need to be adapted? */
1051   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1052   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1053   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1054   /* print info */
1055   if (pcbddc->dbg_flag) {
1056     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1057     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1058     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1059     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1060     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1061   }
1062   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1063     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1064   }
1065 
1066   /* Neumann */
1067   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
1068   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
1069   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
1070   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
1071   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
1072   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
1073   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
1074   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
1075   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
1076   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
1077   /* need to be adapted? */
1078   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1079   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1080   /* print info */
1081   if (pcbddc->dbg_flag) {
1082     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1083     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1084   }
1085   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1086     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1087   }
1088   /* free Neumann problem's matrix */
1089   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1095 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1096 {
1097   PetscErrorCode ierr;
1098   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1099 
1100   PetscFunctionBegin;
1101   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1102   if (pcbddc->local_auxmat1) {
1103     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1104     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1105   }
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1111 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1112 {
1113   PetscErrorCode ierr;
1114   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1115   PC_IS*            pcis = (PC_IS*)  (pc->data);
1116   const PetscScalar zero = 0.0;
1117 
1118   PetscFunctionBegin;
1119   /* Application of PHI^T (or PSI^T)  */
1120   if (pcbddc->coarse_psi_B) {
1121     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1122     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1123   } else {
1124     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1125     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1126   }
1127   /* Scatter data of coarse_rhs */
1128   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1129   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1130 
1131   /* Local solution on R nodes */
1132   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1133   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1134   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1135   if (pcbddc->switch_static) {
1136     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   }
1139   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1140   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1141   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143   if (pcbddc->switch_static) {
1144     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1145     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1146   }
1147 
1148   /* Coarse solution */
1149   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1150   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1151     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1152   }
1153   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1154   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155 
1156   /* Sum contributions from two levels */
1157   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1158   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1164 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1165 {
1166   PetscErrorCode ierr;
1167   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1168 
1169   PetscFunctionBegin;
1170   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1176 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1177 {
1178   PetscErrorCode ierr;
1179   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1180 
1181   PetscFunctionBegin;
1182   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 /* uncomment for testing purposes */
1187 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1188 #undef __FUNCT__
1189 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1190 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1191 {
1192   PetscErrorCode    ierr;
1193   PC_IS*            pcis = (PC_IS*)(pc->data);
1194   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1195   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1196   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1197   MatType           impMatType=MATSEQAIJ;
1198   /* one and zero */
1199   PetscScalar       one=1.0,zero=0.0;
1200   /* space to store constraints and their local indices */
1201   PetscScalar       *temp_quadrature_constraint;
1202   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1203   /* iterators */
1204   PetscInt          i,j,k,total_counts,temp_start_ptr;
1205   /* stuff to store connected components stored in pcbddc->mat_graph */
1206   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1207   PetscInt          n_ISForFaces,n_ISForEdges;
1208   /* near null space stuff */
1209   MatNullSpace      nearnullsp;
1210   const Vec         *nearnullvecs;
1211   Vec               *localnearnullsp;
1212   PetscBool         nnsp_has_cnst;
1213   PetscInt          nnsp_size;
1214   PetscScalar       *array;
1215   /* BLAS integers */
1216   PetscBLASInt      lwork,lierr;
1217   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1218   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1219   /* LAPACK working arrays for SVD or POD */
1220   PetscBool         skip_lapack;
1221   PetscScalar       *work;
1222   PetscReal         *singular_vals;
1223 #if defined(PETSC_USE_COMPLEX)
1224   PetscReal         *rwork;
1225 #endif
1226 #if defined(PETSC_MISSING_LAPACK_GESVD)
1227   PetscBLASInt      Blas_one_2=1;
1228   PetscScalar       *temp_basis,*correlation_mat;
1229 #else
1230   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1231   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1232 #endif
1233   /* change of basis */
1234   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1235   PetscBool         boolforchange,qr_needed;
1236   PetscBT           touched,change_basis,qr_needed_idx;
1237   /* auxiliary stuff */
1238   PetscInt          *nnz,*is_indices,*local_to_B;
1239   /* some quantities */
1240   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1241   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1242 
1243 
1244   PetscFunctionBegin;
1245   /* Get index sets for faces, edges and vertices from graph */
1246   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1247     pcbddc->use_vertices = PETSC_TRUE;
1248   }
1249   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1250   /* HACK: provide functions to set change of basis */
1251   if (!ISForVertices && pcbddc->NullSpace) {
1252     pcbddc->use_change_of_basis = PETSC_TRUE;
1253     pcbddc->use_change_on_faces = PETSC_FALSE;
1254   }
1255   /* print some info */
1256   if (pcbddc->dbg_flag) {
1257     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1258     i = 0;
1259     if (ISForVertices) {
1260       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1261     }
1262     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1263     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1264     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1265     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1266   }
1267   /* check if near null space is attached to global mat */
1268   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1269   if (nearnullsp) {
1270     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1271   } else { /* if near null space is not provided BDDC uses constants by default */
1272     nnsp_size = 0;
1273     nnsp_has_cnst = PETSC_TRUE;
1274   }
1275   /* get max number of constraints on a single cc */
1276   max_constraints = nnsp_size;
1277   if (nnsp_has_cnst) max_constraints++;
1278 
1279   /*
1280        Evaluate maximum storage size needed by the procedure
1281        - temp_indices will contain start index of each constraint stored as follows
1282        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1283        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1284        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1285                                                                                                                                                          */
1286   total_counts = n_ISForFaces+n_ISForEdges;
1287   total_counts *= max_constraints;
1288   n_vertices = 0;
1289   if (ISForVertices) {
1290     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1291   }
1292   total_counts += n_vertices;
1293   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1294   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1295   total_counts = 0;
1296   max_size_of_constraint = 0;
1297   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1298     if (i<n_ISForEdges) {
1299       used_IS = &ISForEdges[i];
1300     } else {
1301       used_IS = &ISForFaces[i-n_ISForEdges];
1302     }
1303     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1304     total_counts += j;
1305     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1306   }
1307   total_counts *= max_constraints;
1308   total_counts += n_vertices;
1309   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1310   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1311   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1312   /* local to boundary numbering */
1313   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1314   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1315   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1316   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1317   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1318   /* get local part of global near null space vectors */
1319   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1320   for (k=0;k<nnsp_size;k++) {
1321     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1322     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1323     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1324   }
1325 
1326   /* whether or not to skip lapack calls */
1327   skip_lapack = PETSC_TRUE;
1328   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1329 
1330   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1331   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1332     PetscScalar temp_work;
1333 #if defined(PETSC_MISSING_LAPACK_GESVD)
1334     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1335     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1336     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1337     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1338 #if defined(PETSC_USE_COMPLEX)
1339     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1340 #endif
1341     /* now we evaluate the optimal workspace using query with lwork=-1 */
1342     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1343     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1344     lwork = -1;
1345     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1346 #if !defined(PETSC_USE_COMPLEX)
1347     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1348 #else
1349     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1350 #endif
1351     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1352     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1353 #else /* on missing GESVD */
1354     /* SVD */
1355     PetscInt max_n,min_n;
1356     max_n = max_size_of_constraint;
1357     min_n = max_constraints;
1358     if (max_size_of_constraint < max_constraints) {
1359       min_n = max_size_of_constraint;
1360       max_n = max_constraints;
1361     }
1362     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1363 #if defined(PETSC_USE_COMPLEX)
1364     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1365 #endif
1366     /* now we evaluate the optimal workspace using query with lwork=-1 */
1367     lwork = -1;
1368     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1369     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1370     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1371     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1372 #if !defined(PETSC_USE_COMPLEX)
1373     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1374 #else
1375     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1376 #endif
1377     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1378     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1379 #endif /* on missing GESVD */
1380     /* Allocate optimal workspace */
1381     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1382     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1383   }
1384   /* Now we can loop on constraining sets */
1385   total_counts = 0;
1386   temp_indices[0] = 0;
1387   /* vertices */
1388   if (ISForVertices) {
1389     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1390     if (nnsp_has_cnst) { /* consider all vertices */
1391       for (i=0;i<n_vertices;i++) {
1392         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1393         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1394         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1395         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1396         total_counts++;
1397       }
1398     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1399       PetscBool used_vertex;
1400       for (i=0;i<n_vertices;i++) {
1401         used_vertex = PETSC_FALSE;
1402         k = 0;
1403         while (!used_vertex && k<nnsp_size) {
1404           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1405           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1406             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1407             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1408             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1409             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1410             total_counts++;
1411             used_vertex = PETSC_TRUE;
1412           }
1413           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1414           k++;
1415         }
1416       }
1417     }
1418     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1419     n_vertices = total_counts;
1420   }
1421 
1422   /* edges and faces */
1423   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1424     if (i<n_ISForEdges) {
1425       used_IS = &ISForEdges[i];
1426       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1427     } else {
1428       used_IS = &ISForFaces[i-n_ISForEdges];
1429       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1430     }
1431     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1432     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1433     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1434     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1435     /* change of basis should not be performed on local periodic nodes */
1436     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1437     if (nnsp_has_cnst) {
1438       PetscScalar quad_value;
1439       temp_constraints++;
1440       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1441       for (j=0;j<size_of_constraint;j++) {
1442         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1443         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1444         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1445       }
1446       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1447       total_counts++;
1448     }
1449     for (k=0;k<nnsp_size;k++) {
1450       PetscReal real_value;
1451       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1452       for (j=0;j<size_of_constraint;j++) {
1453         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1454         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1455         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1456       }
1457       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1458       /* check if array is null on the connected component */
1459       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1460       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1461       if (real_value > 0.0) { /* keep indices and values */
1462         temp_constraints++;
1463         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1464         total_counts++;
1465       }
1466     }
1467     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1468     valid_constraints = temp_constraints;
1469     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1470     if (!pcbddc->use_nnsp_true && temp_constraints) {
1471       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1472 
1473 #if defined(PETSC_MISSING_LAPACK_GESVD)
1474       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1475          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1476          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1477             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1478             from that computed using LAPACKgesvd
1479          -> This is due to a different computation of eigenvectors in LAPACKheev
1480          -> The quality of the POD-computed basis will be the same */
1481       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1482       /* Store upper triangular part of correlation matrix */
1483       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1484       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1485       for (j=0;j<temp_constraints;j++) {
1486         for (k=0;k<j+1;k++) {
1487           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1488         }
1489       }
1490       /* compute eigenvalues and eigenvectors of correlation matrix */
1491       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1492       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1493 #if !defined(PETSC_USE_COMPLEX)
1494       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1495 #else
1496       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1497 #endif
1498       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1499       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1500       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1501       j=0;
1502       while (j < temp_constraints && singular_vals[j] < tol) j++;
1503       total_counts=total_counts-j;
1504       valid_constraints = temp_constraints-j;
1505       /* scale and copy POD basis into used quadrature memory */
1506       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1507       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1508       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1509       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1510       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1511       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1512       if (j<temp_constraints) {
1513         PetscInt ii;
1514         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1515         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1516         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1517         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1518         for (k=0;k<temp_constraints-j;k++) {
1519           for (ii=0;ii<size_of_constraint;ii++) {
1520             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1521           }
1522         }
1523       }
1524 #else  /* on missing GESVD */
1525       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1526       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1527       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1528       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1529 #if !defined(PETSC_USE_COMPLEX)
1530       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1531 #else
1532       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1533 #endif
1534       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1535       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1536       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1537       k = temp_constraints;
1538       if (k > size_of_constraint) k = size_of_constraint;
1539       j = 0;
1540       while (j < k && singular_vals[k-j-1] < tol) j++;
1541       total_counts = total_counts-temp_constraints+k-j;
1542       valid_constraints = k-j;
1543 #endif /* on missing GESVD */
1544     }
1545     /* setting change_of_basis flag is safe now */
1546     if (boolforchange) {
1547       for (j=0;j<valid_constraints;j++) {
1548         PetscBTSet(change_basis,total_counts-j-1);
1549       }
1550     }
1551   }
1552   /* free index sets of faces, edges and vertices */
1553   for (i=0;i<n_ISForFaces;i++) {
1554     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1555   }
1556   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1557   for (i=0;i<n_ISForEdges;i++) {
1558     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1559   }
1560   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1561   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1562 
1563   /* free workspace */
1564   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1565     ierr = PetscFree(work);CHKERRQ(ierr);
1566 #if defined(PETSC_USE_COMPLEX)
1567     ierr = PetscFree(rwork);CHKERRQ(ierr);
1568 #endif
1569     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1570 #if defined(PETSC_MISSING_LAPACK_GESVD)
1571     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1572     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1573 #endif
1574   }
1575   for (k=0;k<nnsp_size;k++) {
1576     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1577   }
1578   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1579 
1580   /* set quantities in pcbddc data structure */
1581   /* n_vertices defines the number of subdomain corners in the primal space */
1582   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1583   pcbddc->local_primal_size = total_counts;
1584   pcbddc->n_vertices = n_vertices;
1585   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1586 
1587   /* Create constraint matrix */
1588   /* The constraint matrix is used to compute the l2g map of primal dofs */
1589   /* so we need to set it up properly either with or without change of basis */
1590   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1591   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1592   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1593   /* array to compute a local numbering of constraints : vertices first then constraints */
1594   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1595   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1596   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1597   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1598   /* auxiliary stuff for basis change */
1599   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1600   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1601 
1602   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1603   total_primal_vertices=0;
1604   for (i=0;i<pcbddc->local_primal_size;i++) {
1605     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1606     if (size_of_constraint == 1) {
1607       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1608       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1609       aux_primal_minloc[total_primal_vertices]=0;
1610       total_primal_vertices++;
1611     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1612       PetscInt min_loc,min_index;
1613       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1614       /* find first untouched local node */
1615       k = 0;
1616       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1617       min_index = global_indices[k];
1618       min_loc = k;
1619       /* search the minimum among global nodes already untouched on the cc */
1620       for (k=1;k<size_of_constraint;k++) {
1621         /* there can be more than one constraint on a single connected component */
1622         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1623           min_index = global_indices[k];
1624           min_loc = k;
1625         }
1626       }
1627       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1628       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1629       aux_primal_minloc[total_primal_vertices]=min_loc;
1630       total_primal_vertices++;
1631     }
1632   }
1633   /* determine if a QR strategy is needed for change of basis */
1634   qr_needed = PETSC_FALSE;
1635   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1636   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1637     if (PetscBTLookup(change_basis,i)) {
1638       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1639       j = 0;
1640       for (k=0;k<size_of_constraint;k++) {
1641         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1642           j++;
1643         }
1644       }
1645       /* found more than one primal dof on the cc */
1646       if (j > 1) {
1647         PetscBTSet(qr_needed_idx,i);
1648         qr_needed = PETSC_TRUE;
1649       }
1650     }
1651   }
1652   /* free workspace */
1653   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1654 
1655   /* permute indices in order to have a sorted set of vertices */
1656   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1657 
1658   /* nonzero structure of constraint matrix */
1659   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1660   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1661   j=total_primal_vertices;
1662   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1663     if (!PetscBTLookup(change_basis,i)) {
1664       nnz[j]=temp_indices[i+1]-temp_indices[i];
1665       j++;
1666     }
1667   }
1668   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1669   ierr = PetscFree(nnz);CHKERRQ(ierr);
1670   /* set values in constraint matrix */
1671   for (i=0;i<total_primal_vertices;i++) {
1672     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1673   }
1674   total_counts = total_primal_vertices;
1675   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1676     if (!PetscBTLookup(change_basis,i)) {
1677       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1678       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1679       total_counts++;
1680     }
1681   }
1682   /* assembling */
1683   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1684   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   /*
1686   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1687   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1688   */
1689   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1690   if (pcbddc->use_change_of_basis) {
1691     /* dual and primal dofs on a single cc */
1692     PetscInt     dual_dofs,primal_dofs;
1693     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1694     PetscInt     primal_counter;
1695     /* working stuff for GEQRF */
1696     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1697     PetscBLASInt lqr_work;
1698     /* working stuff for UNGQR */
1699     PetscScalar  *gqr_work,lgqr_work_t;
1700     PetscBLASInt lgqr_work;
1701     /* working stuff for TRTRS */
1702     PetscScalar  *trs_rhs;
1703     PetscBLASInt Blas_NRHS;
1704     /* pointers for values insertion into change of basis matrix */
1705     PetscInt     *start_rows,*start_cols;
1706     PetscScalar  *start_vals;
1707     /* working stuff for values insertion */
1708     PetscBT      is_primal;
1709 
1710     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1711     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1712     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1713     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1714     /* work arrays */
1715     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1716     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1717     /* nonzeros per row */
1718     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1719       if (PetscBTLookup(change_basis,i)) {
1720         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1721         if (PetscBTLookup(qr_needed_idx,i)) {
1722           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1723         } else {
1724           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1725           /* get local primal index on the cc */
1726           j = 0;
1727           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1728           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1729         }
1730       }
1731     }
1732     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1733     ierr = PetscFree(nnz);CHKERRQ(ierr);
1734     /* Set initial identity in the matrix */
1735     for (i=0;i<pcis->n_B;i++) {
1736       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1737     }
1738 
1739     if (pcbddc->dbg_flag) {
1740       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1741       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1742     }
1743 
1744 
1745     /* Now we loop on the constraints which need a change of basis */
1746     /*
1747        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1748        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1749 
1750        Basic blocks of change of basis matrix T computed
1751 
1752           - Using the following block transformation if there is only a primal dof on the cc
1753             (in the example, primal dof is the last one of the edge in LOCAL ordering
1754              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1755             | 1        0   ...        0     1 |
1756             | 0        1   ...        0     1 |
1757             |              ...                |
1758             | 0        ...            1     1 |
1759             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1760 
1761           - via QR decomposition of constraints otherwise
1762     */
1763     if (qr_needed) {
1764       /* space to store Q */
1765       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1766       /* first we issue queries for optimal work */
1767       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1768       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1769       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1770       lqr_work = -1;
1771       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1772       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1773       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1774       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1775       lgqr_work = -1;
1776       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1777       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1778       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1779       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1780       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1781       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1782       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1783       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1784       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1785       /* array to store scaling factors for reflectors */
1786       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1787       /* array to store rhs and solution of triangular solver */
1788       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1789       /* allocating workspace for check */
1790       if (pcbddc->dbg_flag) {
1791         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1792       }
1793     }
1794     /* array to store whether a node is primal or not */
1795     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1796     for (i=0;i<total_primal_vertices;i++) {
1797       ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1798     }
1799 
1800     /* loop on constraints and see whether or not they need a change of basis and compute it */
1801     /* -> using implicit ordering contained in temp_indices data */
1802     total_counts = pcbddc->n_vertices;
1803     primal_counter = total_counts;
1804     while (total_counts<pcbddc->local_primal_size) {
1805       primal_dofs = 1;
1806       if (PetscBTLookup(change_basis,total_counts)) {
1807         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1808         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1809           primal_dofs++;
1810         }
1811         /* get constraint info */
1812         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1813         dual_dofs = size_of_constraint-primal_dofs;
1814 
1815         if (pcbddc->dbg_flag) {
1816           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1817         }
1818 
1819         if (primal_dofs > 1) { /* QR */
1820 
1821           /* copy quadrature constraints for change of basis check */
1822           if (pcbddc->dbg_flag) {
1823             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1824           }
1825           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1826           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1827 
1828           /* compute QR decomposition of constraints */
1829           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1830           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1831           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1832           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1833           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1834           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1835           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1836 
1837           /* explictly compute R^-T */
1838           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1839           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1840           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1841           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1842           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1843           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1844           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1845           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1846           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1847           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1848 
1849           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1850           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1851           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1852           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1853           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1854           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1855           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1856           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1857           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1858 
1859           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1860              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1861              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1862           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1863           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1864           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1865           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1866           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1867           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1868           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1869           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1870           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1871           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1872 
1873           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1874           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1875           /* insert cols for primal dofs */
1876           for (j=0;j<primal_dofs;j++) {
1877             start_vals = &qr_basis[j*size_of_constraint];
1878             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1879             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1880           }
1881           /* insert cols for dual dofs */
1882           for (j=0,k=0;j<dual_dofs;k++) {
1883             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1884               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1885               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1886               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1887               j++;
1888             }
1889           }
1890 
1891           /* check change of basis */
1892           if (pcbddc->dbg_flag) {
1893             PetscInt   ii,jj;
1894             PetscBool valid_qr=PETSC_TRUE;
1895             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1896             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1897             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1898             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1899             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1900             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1901             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1902             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1903             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1904             for (jj=0;jj<size_of_constraint;jj++) {
1905               for (ii=0;ii<primal_dofs;ii++) {
1906                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1907                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1908               }
1909             }
1910             if (!valid_qr) {
1911               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1912               for (jj=0;jj<size_of_constraint;jj++) {
1913                 for (ii=0;ii<primal_dofs;ii++) {
1914                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1915                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1916                   }
1917                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1918                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1919                   }
1920                 }
1921               }
1922             } else {
1923               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1924             }
1925           }
1926         } else { /* simple transformation block */
1927           PetscInt row,col;
1928           PetscScalar val;
1929           for (j=0;j<size_of_constraint;j++) {
1930             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
1931             if (!PetscBTLookup(is_primal,row)) {
1932               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1933               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
1934               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
1935             } else {
1936               for (k=0;k<size_of_constraint;k++) {
1937                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1938                 if (row != col) {
1939                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
1940                 } else {
1941                   val = 1.0;
1942                 }
1943                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
1944               }
1945             }
1946           }
1947         }
1948         /* increment primal counter */
1949         primal_counter += primal_dofs;
1950       } else {
1951         if (pcbddc->dbg_flag) {
1952           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1953         }
1954       }
1955       /* increment constraint counter total_counts */
1956       total_counts += primal_dofs;
1957     }
1958 
1959     /* free workspace */
1960     if (qr_needed) {
1961       if (pcbddc->dbg_flag) {
1962         ierr = PetscFree(work);CHKERRQ(ierr);
1963       }
1964       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1965       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1966       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1967       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1968       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1969     }
1970     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1971     /* assembling */
1972     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1973     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1974     /*
1975     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1976     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1977     */
1978   }
1979 
1980   /* flush dbg viewer */
1981   if (pcbddc->dbg_flag) {
1982     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1983   }
1984 
1985   /* free workspace */
1986   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1987   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
1988   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1989   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1990   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1991   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1992   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1993   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1994   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1995   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2001 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2002 {
2003   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2004   PC_IS       *pcis = (PC_IS*)pc->data;
2005   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2006   PetscInt    bs,ierr,i,vertex_size;
2007   PetscViewer viewer=pcbddc->dbg_viewer;
2008 
2009   PetscFunctionBegin;
2010   /* Init local Graph struct */
2011   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2012 
2013   /* Check validity of the csr graph passed in by the user */
2014   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2015     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2016   }
2017 
2018   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2019   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2020     Mat mat_adj;
2021     const PetscInt *xadj,*adjncy;
2022     PetscBool flg_row=PETSC_TRUE;
2023 
2024     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2025     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2026     if (!flg_row) {
2027       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2028     }
2029     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2030     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2031     if (!flg_row) {
2032       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2033     }
2034     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2035   }
2036 
2037   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
2038   vertex_size = 1;
2039   if (!pcbddc->user_provided_isfordofs) {
2040     if (!pcbddc->n_ISForDofs) {
2041       IS *custom_ISForDofs;
2042 
2043       ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
2044       ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
2045       for (i=0;i<bs;i++) {
2046         ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
2047       }
2048       ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
2049       pcbddc->user_provided_isfordofs = PETSC_FALSE;
2050       /* remove my references to IS objects */
2051       for (i=0;i<bs;i++) {
2052         ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
2053       }
2054       ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
2055     }
2056   } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2057     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2058   }
2059 
2060   /* Setup of Graph */
2061   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
2062 
2063   /* Graph's connected components analysis */
2064   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2065 
2066   /* print some info to stdout */
2067   if (pcbddc->dbg_flag) {
2068     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2069   }
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2075 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2076 {
2077   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2078   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2079   PetscErrorCode ierr;
2080 
2081   PetscFunctionBegin;
2082   n = 0;
2083   vertices = 0;
2084   if (pcbddc->ConstraintMatrix) {
2085     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2086     for (i=0;i<local_primal_size;i++) {
2087       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2088       if (size_of_constraint == 1) n++;
2089       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2090     }
2091     if (vertices_idx) {
2092       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2093       n = 0;
2094       for (i=0;i<local_primal_size;i++) {
2095         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2096         if (size_of_constraint == 1) {
2097           vertices[n++]=row_cmat_indices[0];
2098         }
2099         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2100       }
2101     }
2102   }
2103   *n_vertices = n;
2104   if (vertices_idx) *vertices_idx = vertices;
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2110 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2111 {
2112   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2113   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2114   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2115   PetscBT        touched;
2116   PetscErrorCode ierr;
2117 
2118     /* This function assumes that the number of local constraints per connected component
2119        is not greater than the number of nodes defined for the connected component
2120        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2121   PetscFunctionBegin;
2122   n = 0;
2123   constraints_index = 0;
2124   if (pcbddc->ConstraintMatrix) {
2125     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2126     max_size_of_constraint = 0;
2127     for (i=0;i<local_primal_size;i++) {
2128       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2129       if (size_of_constraint > 1) {
2130         n++;
2131       }
2132       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2133       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2134     }
2135     if (constraints_idx) {
2136       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2137       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2138       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2139       n = 0;
2140       for (i=0;i<local_primal_size;i++) {
2141         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2142         if (size_of_constraint > 1) {
2143           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2144           /* find first untouched local node */
2145           j = 0;
2146           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2147           min_index = row_cmat_global_indices[j];
2148           min_loc = j;
2149           /* search the minimum among nodes not yet touched on the connected component
2150              since there can be more than one constraint on a single cc */
2151           for (j=1;j<size_of_constraint;j++) {
2152             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2153               min_index = row_cmat_global_indices[j];
2154               min_loc = j;
2155             }
2156           }
2157           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2158           constraints_index[n++] = row_cmat_indices[min_loc];
2159         }
2160         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2161       }
2162       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2163       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2164     }
2165   }
2166   *n_constraints = n;
2167   if (constraints_idx) *constraints_idx = constraints_index;
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 /* the next two functions has been adapted from pcis.c */
2172 #undef __FUNCT__
2173 #define __FUNCT__ "PCBDDCApplySchur"
2174 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2175 {
2176   PetscErrorCode ierr;
2177   PC_IS          *pcis = (PC_IS*)(pc->data);
2178 
2179   PetscFunctionBegin;
2180   if (!vec2_B) { vec2_B = v; }
2181   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2182   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2183   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2184   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2185   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2191 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2192 {
2193   PetscErrorCode ierr;
2194   PC_IS          *pcis = (PC_IS*)(pc->data);
2195 
2196   PetscFunctionBegin;
2197   if (!vec2_B) { vec2_B = v; }
2198   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2199   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2200   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2201   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2202   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 #undef __FUNCT__
2207 #define __FUNCT__ "PCBDDCSubsetNumbering"
2208 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2209 {
2210   Vec            local_vec,global_vec;
2211   IS             seqis,paris;
2212   VecScatter     scatter_ctx;
2213   PetscScalar    *array;
2214   PetscInt       *temp_global_dofs;
2215   PetscScalar    globalsum;
2216   PetscInt       i,j,s;
2217   PetscInt       nlocals,first_index,old_index,max_local;
2218   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2219   PetscMPIInt    *dof_sizes,*dof_displs;
2220   PetscBool      first_found;
2221   PetscErrorCode ierr;
2222 
2223   PetscFunctionBegin;
2224   /* mpi buffers */
2225   MPI_Comm_size(comm,&size_prec_comm);
2226   MPI_Comm_rank(comm,&rank_prec_comm);
2227   j = ( !rank_prec_comm ? size_prec_comm : 0);
2228   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2229   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2230   /* get maximum size of subset */
2231   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2232   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2233   max_local = 0;
2234   if (n_local_dofs) {
2235     max_local = temp_global_dofs[0];
2236     for (i=1;i<n_local_dofs;i++) {
2237       if (max_local < temp_global_dofs[i] ) {
2238         max_local = temp_global_dofs[i];
2239       }
2240     }
2241   }
2242   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2243   max_global++;
2244   max_local = 0;
2245   if (n_local_dofs) {
2246     max_local = local_dofs[0];
2247     for (i=1;i<n_local_dofs;i++) {
2248       if (max_local < local_dofs[i] ) {
2249         max_local = local_dofs[i];
2250       }
2251     }
2252   }
2253   max_local++;
2254   /* allocate workspace */
2255   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2256   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2257   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2258   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2259   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2260   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2261   /* create scatter */
2262   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2263   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2264   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2265   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2266   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2267   /* init array */
2268   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2269   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2270   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2271   if (local_dofs_mult) {
2272     for (i=0;i<n_local_dofs;i++) {
2273       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2274     }
2275   } else {
2276     for (i=0;i<n_local_dofs;i++) {
2277       array[local_dofs[i]]=1.0;
2278     }
2279   }
2280   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2281   /* scatter into global vec and get total number of global dofs */
2282   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2283   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2284   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2285   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2286   /* Fill global_vec with cumulative function for global numbering */
2287   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2288   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2289   nlocals = 0;
2290   first_index = -1;
2291   first_found = PETSC_FALSE;
2292   for (i=0;i<s;i++) {
2293     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2294       first_found = PETSC_TRUE;
2295       first_index = i;
2296     }
2297     nlocals += (PetscInt)PetscRealPart(array[i]);
2298   }
2299   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2300   if (!rank_prec_comm) {
2301     dof_displs[0]=0;
2302     for (i=1;i<size_prec_comm;i++) {
2303       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2304     }
2305   }
2306   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2307   if (first_found) {
2308     array[first_index] += (PetscScalar)nlocals;
2309     old_index = first_index;
2310     for (i=first_index+1;i<s;i++) {
2311       if (PetscRealPart(array[i]) > 0.0) {
2312         array[i] += array[old_index];
2313         old_index = i;
2314       }
2315     }
2316   }
2317   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2318   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2319   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2320   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2321   /* get global ordering of local dofs */
2322   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2323   if (local_dofs_mult) {
2324     for (i=0;i<n_local_dofs;i++) {
2325       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2326     }
2327   } else {
2328     for (i=0;i<n_local_dofs;i++) {
2329       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2330     }
2331   }
2332   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2333   /* free workspace */
2334   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2335   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2336   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2337   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2338   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2339   /* return pointer to global ordering of local dofs */
2340   *global_numbering_subset = temp_global_dofs;
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2346 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2347 {
2348   PetscInt       i,j;
2349   PetscScalar    *alphas;
2350   PetscErrorCode ierr;
2351 
2352   PetscFunctionBegin;
2353   /* this implements stabilized Gram-Schmidt */
2354   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2355   for (i=0;i<n;i++) {
2356     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2357     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2358     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2359   }
2360   ierr = PetscFree(alphas);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatConvert_IS_AIJ"
2367 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2368 {
2369   Mat new_mat;
2370   Mat_IS *matis = (Mat_IS*)(mat->data);
2371   /* info on mat */
2372   PetscInt bs,rows,cols;
2373   PetscInt lrows,lcols;
2374   PetscInt local_rows,local_cols;
2375   PetscMPIInt nsubdomains;
2376   /* preallocation */
2377   Vec vec_dnz,vec_onz;
2378   PetscScalar *my_dnz,*my_onz,*array;
2379   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2380   PetscInt  index_row,index_col,owner;
2381   PetscInt  *local_indices,*global_indices;
2382   /* work */
2383   PetscInt i,j;
2384   PetscErrorCode ierr;
2385 
2386   PetscFunctionBegin;
2387   /* CHECKS */
2388   /* get info from mat */
2389   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2390   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2391   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2392   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2393 
2394   /* MAT_INITIAL_MATRIX */
2395   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2396   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2397   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2398   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2399   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2400   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2401 
2402   /*
2403     preallocation
2404   */
2405 
2406   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2407   /*
2408      Some vectors are needed to sum up properly on shared interface dofs.
2409      Note that preallocation is not exact, since it overestimates nonzeros
2410   */
2411 /*
2412   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2413   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2414   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2415   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2416 */
2417   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2418   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2419   /* All processes need to compute entire row ownership */
2420   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2421   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2422   for (i=0;i<nsubdomains;i++) {
2423     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2424       row_ownership[j]=i;
2425     }
2426   }
2427   /* map indices of local mat to global values */
2428   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2429   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2430   for (i=0;i<local_rows;i++) local_indices[i]=i;
2431   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2432   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2433 
2434   /*
2435      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2436      then, they will be summed up properly. This way, preallocation is always sufficient
2437   */
2438   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2439   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2440   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2441   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2442   for (i=0;i<local_rows;i++) {
2443     index_row = global_indices[i];
2444     for (j=i;j<local_rows;j++) {
2445       owner = row_ownership[index_row];
2446       index_col = global_indices[j];
2447       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2448         my_dnz[i] += 1.0;
2449       } else { /* offdiag block */
2450         my_onz[i] += 1.0;
2451       }
2452       /* same as before, interchanging rows and cols */
2453       if (i != j) {
2454         owner = row_ownership[index_col];
2455         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2456           my_dnz[j] += 1.0;
2457         } else {
2458           my_onz[j] += 1.0;
2459         }
2460       }
2461     }
2462   }
2463   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2464   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2465   if (local_rows) { /* multilevel guard */
2466     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2467     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2468   }
2469   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2470   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2471   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2472   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2473   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2474   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2475   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2476 
2477   /* set computed preallocation in dnz and onz */
2478   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2479   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2480   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2481   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2482   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2483   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2484   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2485   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2486 
2487   /* Resize preallocation if overestimated */
2488   for (i=0;i<lrows;i++) {
2489     dnz[i] = PetscMin(dnz[i],lcols);
2490     onz[i] = PetscMin(onz[i],cols-lcols);
2491   }
2492   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2493   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2494 
2495   /*
2496     Set values. Very Basic.
2497   */
2498   for (i=0;i<local_rows;i++) {
2499     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2500     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2501     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2502     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2503     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2504   }
2505   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2506   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2507   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2508 
2509   /* get back mat */
2510   *M = new_mat;
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 #undef __FUNCT__
2515 #define __FUNCT__ "MatISSubassemble_Private"
2516 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2517 {
2518   Mat             subdomain_adj;
2519   IS              new_ranks,ranks_send_to;
2520   MatPartitioning partitioner;
2521   Mat_IS          *matis;
2522   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2523   PetscInt        prank;
2524   PetscMPIInt     size,rank,color;
2525   PetscInt        *xadj,*adjncy,*oldranks;
2526   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2527   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2528   PetscErrorCode  ierr;
2529   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2530   PetscSubcomm    subcomm;
2531 
2532   PetscFunctionBegin;
2533   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2534   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2535   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2536 
2537   /* Get info on mapping */
2538   matis = (Mat_IS*)(mat->data);
2539   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2540   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2541 
2542   /* build local CSR graph of subdomains' connectivity */
2543   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2544   xadj[0] = 0;
2545   xadj[1] = PetscMax(n_neighs-1,0);
2546   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2547   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2548 
2549   if (threshold) {
2550     PetscInt* count,min_threshold;
2551     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2552     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2553     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2554       for (j=0;j<n_shared[i];j++) {
2555         count[shared[i][j]] += 1;
2556       }
2557     }
2558     /* adapt threshold since we dont want to lose significant connections */
2559     min_threshold = n_neighs;
2560     for (i=1;i<n_neighs;i++) {
2561       for (j=0;j<n_shared[i];j++) {
2562         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2563       }
2564     }
2565     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2566     threshold = PetscMax(min_threshold+1,threshold);
2567     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2568     xadj[1] = 0;
2569     for (i=1;i<n_neighs;i++) {
2570       for (j=0;j<n_shared[i];j++) {
2571         if (count[shared[i][j]] < threshold) {
2572           adjncy[xadj[1]] = neighs[i];
2573           adjncy_wgt[xadj[1]] = n_shared[i];
2574           xadj[1]++;
2575           break;
2576         }
2577       }
2578     }
2579     ierr = PetscFree(count);CHKERRQ(ierr);
2580   } else {
2581     if (xadj[1]) {
2582       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2583       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2584     }
2585   }
2586   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2587   if (use_square) {
2588     for (i=0;i<xadj[1];i++) {
2589       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2590     }
2591   }
2592   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2593 
2594   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2595 
2596   /*
2597     Restrict work on active processes only.
2598   */
2599   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2600   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2601   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2602   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2603   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2604   if (color) {
2605     ierr = PetscFree(xadj);CHKERRQ(ierr);
2606     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2607     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2608   } else {
2609     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2610     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2611     prank = rank;
2612     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2613     for (i=0;i<size;i++) {
2614       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2615     }
2616     for (i=0;i<xadj[1];i++) {
2617       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2618     }
2619     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2620     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2621     n_subdomains = (PetscInt)size;
2622     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2623 
2624     /* Partition */
2625     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2626     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2627     if (use_vwgt) {
2628       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2629       v_wgt[0] = local_size;
2630       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2631     }
2632     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2633     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2634     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2635     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2636     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2637 
2638     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2639     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2640     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2641     /* clean up */
2642     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2643     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2644     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2645     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2646   }
2647   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2648 
2649   /* assemble parallel IS for sends */
2650   i = 1;
2651   if (color) i=0;
2652   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2653   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2654 
2655   /* get back IS */
2656   *is_sends = ranks_send_to;
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatISSubassemble"
2664 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2665 {
2666   Mat                    local_mat,new_mat;
2667   Mat_IS                 *matis;
2668   IS                     is_sends_internal;
2669   PetscInt               rows,cols;
2670   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2671   PetscBool              ismatis,isdense;
2672   ISLocalToGlobalMapping l2gmap;
2673   PetscInt*              l2gmap_indices;
2674   const PetscInt*        is_indices;
2675   MatType                new_local_type;
2676   MatTypePrivate         new_local_type_private;
2677   /* buffers */
2678   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2679   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2680   /* MPI */
2681   MPI_Comm               comm;
2682   PetscMPIInt            n_sends,n_recvs,commsize;
2683   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2684   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2685   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2686   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2687   PetscErrorCode         ierr;
2688 
2689   PetscFunctionBegin;
2690   /* checks */
2691   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2692   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2693   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2694   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2695   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2696   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2697   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2698   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2699   PetscValidLogicalCollectiveInt(mat,bs,0);
2700   /* prepare IS for sending if not provided */
2701   if (!is_sends) {
2702     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2703     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2704   } else {
2705     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2706     is_sends_internal = is_sends;
2707   }
2708 
2709   /* get pointer of MATIS data */
2710   matis = (Mat_IS*)mat->data;
2711 
2712   /* get comm */
2713   comm = PetscObjectComm((PetscObject)mat);
2714 
2715   /* compute number of sends */
2716   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2717   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2718 
2719   /* compute number of receives */
2720   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2721   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2722   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2723   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2724   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2725   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2726   ierr = PetscFree(iflags);CHKERRQ(ierr);
2727 
2728   /* prepare send/receive buffers */
2729   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2730   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2731   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2732   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2733 
2734   /* Get data from local mat */
2735   if (!isdense) {
2736     /* TODO: See below some guidelines on how to prepare the local buffers */
2737     /*
2738        send_buffer_vals should contain the raw values of the local matrix
2739        send_buffer_idxs should contain:
2740        - MatType_PRIVATE type
2741        - PetscInt        size_of_l2gmap
2742        - PetscInt        global_row_indices[size_of_l2gmap]
2743        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2744     */
2745   } else {
2746     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2747     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2748     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2749     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2750     send_buffer_idxs[1] = i;
2751     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2752     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2753     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2754     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2755     for (i=0;i<n_sends;i++) {
2756       ilengths_vals[is_indices[i]] = len*len;
2757       ilengths_idxs[is_indices[i]] = len+2;
2758     }
2759   }
2760   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2761   buf_size_idxs = 0;
2762   buf_size_vals = 0;
2763   for (i=0;i<n_recvs;i++) {
2764     buf_size_idxs += (PetscInt)olengths_idxs[i];
2765     buf_size_vals += (PetscInt)olengths_vals[i];
2766   }
2767   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2768   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2769 
2770   /* get new tags for clean communications */
2771   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2772   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2773 
2774   /* allocate for requests */
2775   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2776   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2777   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2778   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2779 
2780   /* communications */
2781   ptr_idxs = recv_buffer_idxs;
2782   ptr_vals = recv_buffer_vals;
2783   for (i=0;i<n_recvs;i++) {
2784     source_dest = onodes[i];
2785     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2786     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2787     ptr_idxs += olengths_idxs[i];
2788     ptr_vals += olengths_vals[i];
2789   }
2790   for (i=0;i<n_sends;i++) {
2791     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2792     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2793     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2794   }
2795   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2796   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2797 
2798   /* assemble new l2g map */
2799   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2800   ptr_idxs = recv_buffer_idxs;
2801   buf_size_idxs = 0;
2802   for (i=0;i<n_recvs;i++) {
2803     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2804     ptr_idxs += olengths_idxs[i];
2805   }
2806   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2807   ptr_idxs = recv_buffer_idxs;
2808   buf_size_idxs = 0;
2809   for (i=0;i<n_recvs;i++) {
2810     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2811     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2812     ptr_idxs += olengths_idxs[i];
2813   }
2814   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2815   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2816   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2817 
2818   /* infer new local matrix type from received local matrices type */
2819   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2820   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2821   new_local_type_private = MATAIJ_PRIVATE;
2822   new_local_type = MATSEQAIJ;
2823   if (n_recvs) {
2824     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
2825     ptr_idxs = recv_buffer_idxs;
2826     for (i=0;i<n_recvs;i++) {
2827       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2828         new_local_type_private = MATAIJ_PRIVATE;
2829         break;
2830       }
2831       ptr_idxs += olengths_idxs[i];
2832     }
2833     switch (new_local_type_private) {
2834       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2835         new_local_type = MATSEQAIJ;
2836         bs = 1;
2837         break;
2838       case MATAIJ_PRIVATE:
2839         new_local_type = MATSEQAIJ;
2840         bs = 1;
2841         break;
2842       case MATBAIJ_PRIVATE:
2843         new_local_type = MATSEQBAIJ;
2844         break;
2845       case MATSBAIJ_PRIVATE:
2846         new_local_type = MATSEQSBAIJ;
2847         break;
2848       default:
2849         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2850         break;
2851     }
2852   }
2853 
2854   /* create MATIS object */
2855   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2856   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2857   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2858   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2859   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2860   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2861 
2862   /* set values */
2863   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2864   ptr_vals = recv_buffer_vals;
2865   ptr_idxs = recv_buffer_idxs;
2866   for (i=0;i<n_recvs;i++) {
2867     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2868       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2869       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2870       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2871       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2872       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2873     }
2874     ptr_idxs += olengths_idxs[i];
2875     ptr_vals += olengths_vals[i];
2876   }
2877   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2878   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2880   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2881 
2882   { /* check */
2883     Vec       lvec,rvec;
2884     PetscReal infty_error;
2885 
2886     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2887     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2888     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2889     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2890     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2891     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2892     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2893     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2894     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2895   }
2896 
2897   /* free workspace */
2898   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2899   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2900   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2901   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2902   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2903   if (isdense) {
2904     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2905     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2906   } else {
2907     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2908   }
2909   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2910   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2911   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2912   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2913   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2914   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2915   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2916   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2917   ierr = PetscFree(onodes);CHKERRQ(ierr);
2918   /* get back new mat */
2919   *mat_n = new_mat;
2920   PetscFunctionReturn(0);
2921 }
2922 
2923 #undef __FUNCT__
2924 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2925 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2926 {
2927   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2928   PC_IS                  *pcis = (PC_IS*)pc->data;
2929   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2930   MatNullSpace           CoarseNullSpace=NULL;
2931   ISLocalToGlobalMapping coarse_islg;
2932   IS                     coarse_is;
2933   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2934   PetscInt               im_active=-1,active_procs=-1;
2935   PC                     pc_temp;
2936   PCType                 coarse_pc_type;
2937   KSPType                coarse_ksp_type;
2938   PetscBool              multilevel_requested,multilevel_allowed;
2939   PetscBool              setsym,issym,isbddc,isnn;
2940   MatStructure           matstruct;
2941   PetscErrorCode         ierr;
2942 
2943   PetscFunctionBegin;
2944   /* Assign global numbering to coarse dofs */
2945   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2946 
2947   /* infer some info from user */
2948   issym = PETSC_FALSE;
2949   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2950   multilevel_allowed = PETSC_FALSE;
2951   multilevel_requested = PETSC_FALSE;
2952   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2953   if (multilevel_requested) {
2954     /* count "active processes" */
2955     im_active = !!(pcis->n);
2956     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2957     if (active_procs/pcbddc->coarsening_ratio < 2) {
2958       multilevel_allowed = PETSC_FALSE;
2959     } else {
2960       multilevel_allowed = PETSC_TRUE;
2961     }
2962   }
2963 
2964   /* set defaults for coarse KSP and PC */
2965   if (multilevel_allowed) {
2966     if (issym) {
2967       coarse_ksp_type = KSPRICHARDSON;
2968     } else {
2969       coarse_ksp_type = KSPCHEBYSHEV;
2970     }
2971     coarse_pc_type = PCBDDC;
2972   } else {
2973     coarse_ksp_type = KSPPREONLY;
2974     coarse_pc_type = PCREDUNDANT;
2975   }
2976 
2977   /* create the coarse KSP object only once with defaults */
2978   if (!pcbddc->coarse_ksp) {
2979     char prefix[256],str_level[3];
2980     size_t len;
2981     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2982     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2983     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2984     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2985     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2986     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2987     /* prefix */
2988     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
2989     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2990     if (!pcbddc->current_level) {
2991       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2992       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
2993     } else {
2994       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2995       if (pcbddc->current_level>1) len -= 2;
2996       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
2997       *(prefix+len)='\0';
2998       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
2999       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3000     }
3001     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3002   }
3003   /* allow user customization */
3004   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3005   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3006   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3007 
3008   /* get some info after set from options */
3009   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3010   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3011   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3012   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3013     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3014     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3015     isbddc = PETSC_FALSE;
3016   }
3017 
3018   /* propagate BDDC info to the next level */
3019   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3020   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3021   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3022 
3023   /* creates temporary MATIS object for coarse matrix */
3024   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3025   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3026   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3027   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3028   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3029   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3030   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3031   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3032   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3033   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
3034 
3035   /* assemble coarse matrix */
3036   if (isbddc || isnn) {
3037     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3038   } else {
3039     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3040   }
3041   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3042 
3043   /* create local to global scatters for coarse problem */
3044   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3045   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3046   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3047 
3048   /* propagate symmetry info to coarse matrix */
3049   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3050 
3051   /* Compute coarse null space (special handling by BDDC only) */
3052   if (pcbddc->NullSpace) {
3053     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3054     if (isbddc) {
3055       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3056     } else {
3057       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3058     }
3059   }
3060 
3061   /* set operators */
3062   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3063   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3064 
3065   /* additional KSP customization */
3066   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3067   if (max_it < 5) {
3068     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3069   }
3070   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3071 
3072 
3073   /* print some info if requested */
3074   if (pcbddc->dbg_flag) {
3075     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3076     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3077     if (!multilevel_allowed) {
3078       if (multilevel_requested) {
3079         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3080       } else if (pcbddc->max_levels) {
3081         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3082       }
3083     }
3084     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3085     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3086   }
3087 
3088   /* setup coarse ksp */
3089   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3090   if (pcbddc->dbg_flag) {
3091     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3092     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3093     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3094     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3095   }
3096 
3097   /* Check coarse problem if requested */
3098   if (pcbddc->dbg_flag) {
3099     KSP       check_ksp;
3100     KSPType   check_ksp_type;
3101     PC        check_pc;
3102     Vec       check_vec;
3103     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3104     PetscInt  its;
3105     PetscBool ispreonly,compute;
3106 
3107     /* Create ksp object suitable for estimation of extreme eigenvalues */
3108     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3109     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3110     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3111     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3112     if (ispreonly) {
3113       check_ksp_type = KSPPREONLY;
3114       compute = PETSC_FALSE;
3115     } else {
3116       if (issym) check_ksp_type = KSPCG;
3117       else check_ksp_type = KSPGMRES;
3118       compute = PETSC_TRUE;
3119     }
3120     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3121     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3122     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3123     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3124     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3125     /* create random vec */
3126     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3127     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3128     if (CoarseNullSpace) {
3129       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3130     }
3131     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3132     /* solve coarse problem */
3133     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3134     if (CoarseNullSpace) {
3135       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3136     }
3137     /* check coarse problem residual error */
3138     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3139     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3140     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3141     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3142     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3143     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3144     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3145     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3146     /* get eigenvalue estimation if preonly has not been requested */
3147     if (!ispreonly) {
3148       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3149       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3150       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3151     }
3152     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3153     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3154   }
3155   /* free memory */
3156   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3157   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3158   PetscFunctionReturn(0);
3159 }
3160 
3161 #undef __FUNCT__
3162 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3163 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3164 {
3165   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3166   PC_IS*         pcis = (PC_IS*)pc->data;
3167   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3168   PetscInt       i,j,coarse_size;
3169   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3170   PetscErrorCode ierr;
3171 
3172   PetscFunctionBegin;
3173   /* get indices in local ordering for vertices and constraints */
3174   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3176   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3177   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3178   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3179   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3180   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3181 
3182   /* Compute global number of coarse dofs */
3183   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3184 
3185   /* check numbering */
3186   if (pcbddc->dbg_flag) {
3187     PetscScalar coarsesum,*array;
3188 
3189     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3190     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3191     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3192     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3193     for (i=0;i<pcbddc->local_primal_size;i++) {
3194       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3195     }
3196     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3197     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3198     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3199     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3200     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3201     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3202     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3203     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3204     for (i=0;i<pcis->n;i++) {
3205       if (array[i] == 1.0) {
3206         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3207       }
3208     }
3209     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3210     for (i=0;i<pcis->n;i++) {
3211       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3212     }
3213     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3214     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3215     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3216     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3217     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3218     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3219     if (pcbddc->dbg_flag > 1) {
3220       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3221       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3222       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3223       for (i=0;i<pcbddc->local_primal_size;i++) {
3224         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]);
3225       }
3226       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3227     }
3228     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3229   }
3230   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3231   /* get back data */
3232   *coarse_size_n = coarse_size;
3233   *local_primal_indices_n = local_primal_indices;
3234   PetscFunctionReturn(0);
3235 }
3236 
3237