xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision bbd3f3368baff017a6ea7962313571cf23b2b4ec)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
96   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
98   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
107   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
111   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
112   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
119 {
120   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
121   PC_IS          *pcis = (PC_IS*)pc->data;
122   VecType        impVecType;
123   PetscInt       n_constraints,n_R,old_size;
124   PetscErrorCode ierr;
125 
126   PetscFunctionBegin;
127   if (!pcbddc->ConstraintMatrix) {
128     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
129   }
130   /* get sizes */
131   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
132   n_R = pcis->n-pcbddc->n_actual_vertices;
133   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
134   /* local work vectors (try to avoid unneeded work)*/
135   /* R nodes */
136   old_size = -1;
137   if (pcbddc->vec1_R) {
138     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
139   }
140   if (n_R != old_size) {
141     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
142     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
143     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
144     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
145     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
146     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
147   }
148   /* local primal dofs */
149   old_size = -1;
150   if (pcbddc->vec1_P) {
151     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
152   }
153   if (pcbddc->local_primal_size != old_size) {
154     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
155     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
156     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
157     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
158   }
159   /* local explicit constraints */
160   old_size = -1;
161   if (pcbddc->vec1_C) {
162     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
163   }
164   if (n_constraints && n_constraints != old_size) {
165     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
166     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
167     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
168     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "PCBDDCSetUpCorrection"
175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
176 {
177   PetscErrorCode         ierr;
178   /* pointers to pcis and pcbddc */
179   PC_IS*                 pcis = (PC_IS*)pc->data;
180   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
181   /* submatrices of local problem */
182   Mat                    A_RV,A_VR,A_VV;
183   /* working matrices */
184   Mat                    M1,M2,M3,C_CR;
185   /* working vectors */
186   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
187   /* additional working stuff */
188   IS                     is_aux;
189   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
190   const PetscScalar      *array,*row_cmat_values;
191   const PetscInt         *row_cmat_indices,*idx_R_local;
192   PetscInt               *idx_V_B,*auxindices;
193   PetscInt               n_vertices,n_constraints,size_of_constraint;
194   PetscInt               i,j,n_R,n_D,n_B;
195   PetscBool              unsymmetric_check;
196   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
197   MatType                impMatType;
198   /* some shortcuts to scalars */
199   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
200   /* for debugging purposes */
201   PetscReal              *coarsefunctions_errors,*constraints_errors;
202 
203   PetscFunctionBegin;
204   /* get number of vertices (corners plus constraints with change of basis)
205      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
206   n_vertices = pcbddc->n_actual_vertices;
207   n_constraints = pcbddc->local_primal_size-n_vertices;
208   /* Set Non-overlapping dimensions */
209   n_B = pcis->n_B; n_D = pcis->n - n_B;
210   n_R = pcis->n-n_vertices;
211 
212   /* Set types for local objects needed by BDDC precondtioner */
213   impMatType = MATSEQDENSE;
214 
215   /* Allocating some extra storage just to be safe */
216   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
217   for (i=0;i<pcis->n;i++) auxindices[i]=i;
218 
219   /* vertices in boundary numbering */
220   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
221   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
222   if (i != n_vertices) {
223     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
224   }
225 
226   /* Precompute stuffs needed for preprocessing and application of BDDC*/
227   if (n_constraints) {
228     /* see if we can save some allocations */
229     if (pcbddc->local_auxmat2) {
230       PetscInt on_R,on_constraints;
231       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
232       if (on_R != n_R || on_constraints != n_constraints) {
233         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
234         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
235       }
236     }
237     /* work vectors */
238     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
239     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
240     /* auxiliary matrices */
241     if (!pcbddc->local_auxmat2) {
242       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
243       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
244       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
245       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
246     }
247 
248     /* Extract constraints on R nodes: C_{CR}  */
249     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
250     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
251     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
252 
253     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
254     for (i=0;i<n_constraints;i++) {
255       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
256       /* Get row of constraint matrix in R numbering */
257       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
258       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
259       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
260       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
261       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
262       /* Solve for row of constraint matrix in R numbering */
263       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
264       /* Set values in local_auxmat2 */
265       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
266       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
267       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
268     }
269     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
272 
273     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
274     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
275     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
276     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
277     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
278     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
279     ierr = MatSetUp(M1);CHKERRQ(ierr);
280     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
281     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
282     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
283     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
284     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
285     ierr = MatDestroy(&M2);CHKERRQ(ierr);
286     ierr = MatDestroy(&M3);CHKERRQ(ierr);
287     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
288     if (!pcbddc->local_auxmat1) {
289       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
290     } else {
291       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
292     }
293   }
294 
295   /* Get submatrices from subdomain matrix */
296   if (n_vertices) {
297     PetscInt ibs,mbs;
298     PetscBool issbaij;
299     Mat newmat;
300 
301     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
302     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
303     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
304     if (ibs != mbs) { /* need to convert to SEQAIJ */
305       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
307       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
308       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
309       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
310     } else {
311       /* this is safe */
312       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
313       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
314       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
315         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
316         /* which of the two approaches is faster? */
317         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
318         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
319         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
320         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
321         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
322       } else {
323         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
324         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
325       }
326     }
327     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
328     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
329     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
330   }
331 
332   /* Matrix of coarse basis functions (local) */
333   if (pcbddc->coarse_phi_B) {
334     PetscInt on_B,on_primal;
335     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
336     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
337       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
338       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
339     }
340   }
341   if (pcbddc->coarse_phi_D) {
342     PetscInt on_D,on_primal;
343     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
344     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
345       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
346       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
347     }
348   }
349   if (!pcbddc->coarse_phi_B) {
350     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
351     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
352     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
353     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
354   }
355   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
356     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
357     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
358     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
359     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
360   }
361 
362   if (pcbddc->dbg_flag) {
363     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
364     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
365     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
366   }
367   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
368   ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
371 
372   /* vertices */
373   for (i=0;i<n_vertices;i++) {
374     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
375     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
376     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
377     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
378     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
379     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
380     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
381     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
382     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
383     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
384     if (n_constraints) {
385       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
386       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
387       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
388     }
389     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
390     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
391 
392     /* Set values in coarse basis function and subdomain part of coarse_mat */
393     /* coarse basis functions */
394     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
395     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
398     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
399     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
400     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
401     if (pcbddc->switch_static || pcbddc->dbg_flag) {
402       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
406       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
407     }
408     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
409     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
410     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
411     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
412     if (n_constraints) {
413       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
414       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
415       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
416     }
417 
418     /* check */
419     if (pcbddc->dbg_flag) {
420       /* assemble subdomain vector on local nodes */
421       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
422       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
423       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
424       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
425       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
426       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
427       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
428       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
429       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
430       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
431       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
432       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
433       if (n_constraints) {
434         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
435         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
436         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
437       }
438       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
441       /* check saddle point solution */
442       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
443       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
444       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
445       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
446       /* shift by the identity matrix */
447       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
448       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
449       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
450       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
451     }
452   }
453 
454   /* constraints */
455   for (i=0;i<n_constraints;i++) {
456     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
457     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
458     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
459     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
460     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
461     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
462     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
463     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
464     if (n_vertices) {
465       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
466     }
467     /* Set values in coarse basis function and subdomain part of coarse_mat */
468     /* coarse basis functions */
469     j = i+n_vertices; /* don't touch this! */
470     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
471     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
474     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
475     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
476     if (pcbddc->switch_static || pcbddc->dbg_flag) {
477       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
480       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
481       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
482     }
483     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
484     if (n_vertices) {
485       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
486       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
487       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
488     }
489     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
490     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
491     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
492 
493     if (pcbddc->dbg_flag) {
494       /* assemble subdomain vector on nodes */
495       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
496       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
497       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
498       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
499       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
500       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
501       /* assemble subdomain vector of lagrange multipliers */
502       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
503       if (n_vertices) {
504         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
505         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
506         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
507       }
508       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
509       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
510       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
511       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
512       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
513       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
514       /* check saddle point solution */
515       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
516       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
517       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
518       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
519       /* shift by the identity matrix */
520       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
521       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
522       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
523       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
524     }
525   }
526   /* call assembling routines for local coarse basis */
527   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529   if (pcbddc->switch_static || pcbddc->dbg_flag) {
530     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
532   }
533 
534   /* compute other basis functions for non-symmetric problems */
535   ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr);
536   if (!pcbddc->issym) {
537     if (!pcbddc->coarse_psi_B) {
538       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
539       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
540       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
541       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
542     }
543     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
544       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
545       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
546       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
547       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
548     }
549     for (i=0;i<pcbddc->local_primal_size;i++) {
550       if (n_constraints) {
551         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
552         for (j=0;j<n_constraints;j++) {
553           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
554         }
555         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
556         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
557       }
558       if (i<n_vertices) {
559         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
560         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
561         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
562         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
563         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
564         if (n_constraints) {
565           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
566         }
567       } else {
568         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
569       }
570       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
571       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
572       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
573       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
574       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
575       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
576       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
577       if (i<n_vertices) {
578         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
579       }
580       if (pcbddc->switch_static || pcbddc->dbg_flag) {
581         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
582         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
583         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
584         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
585         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
586       }
587 
588       if (pcbddc->dbg_flag) {
589         /* assemble subdomain vector on nodes */
590         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
591         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
592         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
593         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
594         if (i<n_vertices) {
595           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
596         }
597         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
598         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
599         /* assemble subdomain vector of lagrange multipliers */
600         for (j=0;j<pcbddc->local_primal_size;j++) {
601           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
602         }
603         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
604         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
605         /* check saddle point solution */
606         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
607         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
608         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
609         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
610         /* shift by the identity matrix */
611         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
612         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
613         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
614         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
615       }
616     }
617     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
618     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619     if (pcbddc->switch_static || pcbddc->dbg_flag) {
620       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
621       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
622     }
623     unsymmetric_check = PETSC_TRUE;
624   } else { /* take references to already computed coarse basis */
625     unsymmetric_check = PETSC_FALSE;
626     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
627     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
628     if (pcbddc->coarse_phi_D) {
629       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
630       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
631     }
632   }
633   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
634   /* Checking coarse_sub_mat and coarse basis functios */
635   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
636   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
637   if (pcbddc->dbg_flag) {
638     Mat         coarse_sub_mat;
639     Mat         AUXMAT,TM1,TM2,TM3,TM4;
640     Mat         coarse_phi_D,coarse_phi_B;
641     Mat         coarse_psi_D,coarse_psi_B;
642     Mat         A_II,A_BB,A_IB,A_BI;
643     MatType     checkmattype=MATSEQAIJ;
644     PetscReal   real_value;
645 
646     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
647     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
648     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
649     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
650     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
651     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
652     if (unsymmetric_check) {
653       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
654       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
655     }
656     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
657 
658     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
659     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
660     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
661     if (unsymmetric_check) {
662       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
663       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
664       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
665       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
666       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
667       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
668       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
669       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
670       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
671       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
672       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
673       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
674     } else {
675       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
676       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
677       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
678       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
679       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
680       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
681       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
682       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
683     }
684     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
685     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
686     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
687     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
688     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
689     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
690     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
691     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
692     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
693     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
694     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
695     for (i=0;i<pcbddc->local_primal_size;i++) {
696       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
697     }
698     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
699     for (i=0;i<pcbddc->local_primal_size;i++) {
700       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
701     }
702     if (unsymmetric_check) {
703       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
704       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
705         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
706       }
707       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
708       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
709         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
710       }
711     }
712     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
713     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
714     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
715     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
716     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
717     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
718     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
719     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
720     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
721     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
722     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
723     if (unsymmetric_check) {
724       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
725       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
726     }
727     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
728     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
729     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
730     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
731   }
732   /* free memory */
733   if (n_vertices) {
734     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
735     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
736     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
737     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
738     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
739   }
740   if (n_constraints) {
741     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
742     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
743     ierr = MatDestroy(&M1);CHKERRQ(ierr);
744     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
745   }
746   ierr = PetscFree(auxindices);CHKERRQ(ierr);
747   /* get back data */
748   *coarse_submat_vals_n = coarse_submat_vals;
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNCT__
753 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
754 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
755 {
756   PC_IS*            pcis = (PC_IS*)(pc->data);
757   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
758   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
759   PetscBool         issbaij,isseqaij;
760   /* manage repeated solves */
761   MatReuse          reuse;
762   PetscErrorCode    ierr;
763 
764   PetscFunctionBegin;
765   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
766     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
767   }
768   /* get mat flags */
769   reuse = MAT_INITIAL_MATRIX;
770   if (pc->setupcalled) {
771     if (pc->flag == SAME_NONZERO_PATTERN) {
772       reuse = MAT_REUSE_MATRIX;
773     } else {
774       reuse = MAT_INITIAL_MATRIX;
775     }
776   }
777   if (reuse == MAT_INITIAL_MATRIX) {
778     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
779     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
780     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
781     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
782     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
783   }
784 
785   /* transform local matrices if needed */
786   if (pcbddc->ChangeOfBasisMatrix) {
787     Mat         change_mat_all;
788     PetscScalar *row_cmat_values;
789     PetscInt    *row_cmat_indices;
790     PetscInt    *nnz,*is_indices,*temp_indices;
791     PetscInt    i,j,k,n_D,n_B;
792 
793     /* Get Non-overlapping dimensions */
794     n_B = pcis->n_B;
795     n_D = pcis->n-n_B;
796 
797     /* compute nonzero structure of change of basis on all local nodes */
798     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
799     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
800     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
801     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
802     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
803     k=1;
804     for (i=0;i<n_B;i++) {
805       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
806       nnz[is_indices[i]]=j;
807       if (k < j) k = j;
808       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
809     }
810     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
811     /* assemble change of basis matrix on the whole set of local dofs */
812     ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr);
813     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
814     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
815     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
816     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
817     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
818     for (i=0;i<n_D;i++) {
819       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
822     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
823     for (i=0;i<n_B;i++) {
824       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
825       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
826       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
827       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
828     }
829     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
830     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
831     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
832     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr);
833     if (isseqaij) {
834       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
835     } else {
836       Mat work_mat;
837       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
838       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
839       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
840     }
841     /*
842     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
843     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
844     */
845     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
846     ierr = PetscFree(nnz);CHKERRQ(ierr);
847     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
848   } else {
849     /* without change of basis, the local matrix is unchanged */
850     if (!pcbddc->local_mat) {
851       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
852       pcbddc->local_mat = matis->A;
853     }
854   }
855 
856   /* get submatrices */
857   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
858   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
859   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
860   if (!issbaij) {
861     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
862     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
863   } else {
864     Mat newmat;
865     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
866     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
867     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
868     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
869   }
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
875 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
876 {
877   PC_IS*         pcis = (PC_IS*)(pc->data);
878   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
879   IS             is_aux1,is_aux2;
880   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
881   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
882   PetscInt       vbs,bs;
883   PetscBT        bitmask;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887   /*
888     No need to setup local scatters if
889       - primal space is unchanged
890         AND
891       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
892         AND
893       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
894   */
895   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
896     PetscFunctionReturn(0);
897   }
898   /* destroy old objects */
899   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
900   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
901   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
902   /* Set Non-overlapping dimensions */
903   n_B = pcis->n_B; n_D = pcis->n - n_B;
904   n_vertices = pcbddc->n_actual_vertices;
905   /* create auxiliary bitmask */
906   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
907   for (i=0;i<n_vertices;i++) {
908     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
909   }
910 
911   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
912   ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr);
913   for (i=0, n_R=0; i<pcis->n; i++) {
914     if (!PetscBTLookup(bitmask,i)) {
915       idx_R_local[n_R] = i;
916       n_R++;
917     }
918   }
919 
920   /* Block code */
921   vbs = 1;
922   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
923   if (bs>1 && !(n_vertices%bs)) {
924     PetscBool is_blocked = PETSC_TRUE;
925     PetscInt  *vary;
926     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
927     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
928     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
929     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
930     for (i=0; i<n_vertices; i++) {
931       if (vary[i]!=0 && vary[i]!=bs) {
932         is_blocked = PETSC_FALSE;
933         break;
934       }
935     }
936     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
937       vbs = bs;
938       for (i=0;i<n_R/vbs;i++) {
939         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
940       }
941     }
942     ierr = PetscFree(vary);CHKERRQ(ierr);
943   }
944   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
945   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
946 
947   /* print some info if requested */
948   if (pcbddc->dbg_flag) {
949     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
950     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
951     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
952     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
953     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
954     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
955     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
956     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
957   }
958 
959   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
960   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
961   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr);
962   ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr);
963   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
964   for (i=0; i<n_D; i++) {
965     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
966   }
967   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
968   for (i=0, j=0; i<n_R; i++) {
969     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
970       aux_array1[j++] = i;
971     }
972   }
973   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
974   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
975   for (i=0, j=0; i<n_B; i++) {
976     if (!PetscBTLookup(bitmask,is_indices[i])) {
977       aux_array2[j++] = i;
978     }
979   }
980   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
981   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
982   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
983   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
984   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
985 
986   if (pcbddc->switch_static || pcbddc->dbg_flag) {
987     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
988     for (i=0, j=0; i<n_R; i++) {
989       if (PetscBTLookup(bitmask,idx_R_local[i])) {
990         aux_array1[j++] = i;
991       }
992     }
993     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
994     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
995     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
996   }
997   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
998   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1005 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1006 {
1007   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1008   PC_IS          *pcis = (PC_IS*)pc->data;
1009   PC             pc_temp;
1010   Mat            A_RR;
1011   MatReuse       reuse;
1012   PetscScalar    m_one = -1.0;
1013   PetscReal      value;
1014   PetscInt       n_D,n_R,ibs,mbs;
1015   PetscBool      use_exact,use_exact_reduced,issbaij;
1016   PetscErrorCode ierr;
1017   /* prefixes stuff */
1018   char           dir_prefix[256],neu_prefix[256],str_level[16];
1019   size_t         len;
1020 
1021   PetscFunctionBegin;
1022 
1023   /* compute prefixes */
1024   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1025   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1026   if (!pcbddc->current_level) {
1027     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1028     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1029     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1030     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1031   } else {
1032     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1033     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1034     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1035     len -= 15; /* remove "pc_bddc_coarse_" */
1036     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1037     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1038     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1039     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1040     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1041     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1042     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1043     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1044   }
1045 
1046   /* DIRICHLET PROBLEM */
1047   /* Matrix for Dirichlet problem is pcis->A_II */
1048   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1049   if (!pcbddc->ksp_D) { /* create object if not yet build */
1050     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1051     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1052     /* default */
1053     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1054     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1055     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1056     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1057     if (issbaij) {
1058       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1059     } else {
1060       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1061     }
1062     /* Allow user's customization */
1063     ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1064     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1065   }
1066   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1067   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1068   if (!n_D) {
1069     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1070     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1071   }
1072   /* Set Up KSP for Dirichlet problem of BDDC */
1073   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1074   /* set ksp_D into pcis data */
1075   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1076   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1077   pcis->ksp_D = pcbddc->ksp_D;
1078 
1079   /* NEUMANN PROBLEM */
1080   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1081   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1082   if (pcbddc->ksp_R) { /* already created ksp */
1083     PetscInt nn_R;
1084     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1085     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1086     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1087     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1088       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1089       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1090       reuse = MAT_INITIAL_MATRIX;
1091     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1092       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1093         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1094         reuse = MAT_INITIAL_MATRIX;
1095       } else { /* safe to reuse the matrix */
1096         reuse = MAT_REUSE_MATRIX;
1097       }
1098     }
1099     /* last check */
1100     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1101       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1102       reuse = MAT_INITIAL_MATRIX;
1103     }
1104   } else { /* first time, so we need to create the matrix */
1105     reuse = MAT_INITIAL_MATRIX;
1106   }
1107   /* extract A_RR */
1108   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1109   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1110   if (ibs != mbs) {
1111     Mat newmat;
1112     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1113     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1114     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1115   } else {
1116     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1117   }
1118   if (!pcbddc->ksp_R) { /* create object if not present */
1119     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1120     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1121     /* default */
1122     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1123     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1124     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1125     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1126     if (issbaij) {
1127       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1128     } else {
1129       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1130     }
1131     /* Allow user's customization */
1132     ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1133     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1134   }
1135   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1136   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1137   if (!n_R) {
1138     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1139     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1140   }
1141   /* Set Up KSP for Neumann problem of BDDC */
1142   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1143 
1144   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1145   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1146     /* Dirichlet */
1147     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1148     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1149     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1150     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1151     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1152     /* need to be adapted? */
1153     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1154     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1155     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1156     /* print info */
1157     if (pcbddc->dbg_flag) {
1158       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1159       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1160       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1161       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1162       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1163       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1164     }
1165     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1166       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1167     }
1168 
1169     /* Neumann */
1170     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1171     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1172     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1173     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1174     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1175     /* need to be adapted? */
1176     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1177     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1178     /* print info */
1179     if (pcbddc->dbg_flag) {
1180       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1181       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1182     }
1183     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1184       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1185     }
1186   }
1187   /* free Neumann problem's matrix */
1188   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1194 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1195 {
1196   PetscErrorCode ierr;
1197   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1198 
1199   PetscFunctionBegin;
1200   if (applytranspose) {
1201     if (pcbddc->local_auxmat1) {
1202       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1203       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1204     }
1205     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1206   } else {
1207     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1208     if (pcbddc->local_auxmat1) {
1209       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1210       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1211     }
1212   }
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1219 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1220 {
1221   PetscErrorCode ierr;
1222   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1223   PC_IS*            pcis = (PC_IS*)  (pc->data);
1224   const PetscScalar zero = 0.0;
1225 
1226   PetscFunctionBegin;
1227   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1228   if (applytranspose) {
1229     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1230     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1231   } else {
1232     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1233     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1234   }
1235   /* start communications from local primal nodes to rhs of coarse solver */
1236   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1237   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240   /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1241   /* TODO remove null space when doing multilevel */
1242   if (pcbddc->coarse_ksp) {
1243     if (applytranspose) {
1244       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1245     } else {
1246       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1247     }
1248   }
1249   /* start communications from coarse solver solution to local primal nodes */
1250   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1251 
1252   /* Local solution on R nodes */
1253   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1254   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1255   ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1256   if (pcbddc->switch_static) {
1257     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1258     ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1259   }
1260   ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1261   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1262   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263   ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264   if (pcbddc->switch_static) {
1265     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267   }
1268 
1269   /* complete communications from coarse sol to local primal nodes */
1270   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1271 
1272   /* Sum contributions from two levels */
1273   if (applytranspose) {
1274     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1275     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1276   } else {
1277     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1278     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1279   }
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1284 #undef __FUNCT__
1285 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1286 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1287 {
1288   PetscErrorCode ierr;
1289   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1290   PetscScalar    *array,*array2;
1291   Vec            from,to;
1292 
1293   PetscFunctionBegin;
1294   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1295     from = pcbddc->coarse_vec;
1296     to = pcbddc->vec1_P;
1297     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1298       Vec tvec;
1299       PetscInt lsize;
1300       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1301       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1302       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1303       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1304       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1305       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1306       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1307     }
1308   } else { /* from local to global -> put data in coarse right hand side */
1309     from = pcbddc->vec1_P;
1310     to = pcbddc->coarse_vec;
1311   }
1312   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1318 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1319 {
1320   PetscErrorCode ierr;
1321   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1322   PetscScalar    *array,*array2;
1323   Vec            from,to;
1324 
1325   PetscFunctionBegin;
1326   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1327     from = pcbddc->coarse_vec;
1328     to = pcbddc->vec1_P;
1329   } else { /* from local to global -> put data in coarse right hand side */
1330     from = pcbddc->vec1_P;
1331     to = pcbddc->coarse_vec;
1332   }
1333   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1334   if (smode == SCATTER_FORWARD) {
1335     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1336       Vec tvec;
1337       PetscInt lsize;
1338       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1339       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1340       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1341       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1342       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1343       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1344       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1345     }
1346   }
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 /* uncomment for testing purposes */
1351 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1352 #undef __FUNCT__
1353 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1354 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1355 {
1356   PetscErrorCode    ierr;
1357   PC_IS*            pcis = (PC_IS*)(pc->data);
1358   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1359   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1360   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1361   MatType           impMatType=MATSEQAIJ;
1362   /* one and zero */
1363   PetscScalar       one=1.0,zero=0.0;
1364   /* space to store constraints and their local indices */
1365   PetscScalar       *temp_quadrature_constraint;
1366   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1367   /* iterators */
1368   PetscInt          i,j,k,total_counts,temp_start_ptr;
1369   /* stuff to store connected components stored in pcbddc->mat_graph */
1370   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1371   PetscInt          n_ISForFaces,n_ISForEdges;
1372   /* near null space stuff */
1373   MatNullSpace      nearnullsp;
1374   const Vec         *nearnullvecs;
1375   Vec               *localnearnullsp;
1376   PetscBool         nnsp_has_cnst;
1377   PetscInt          nnsp_size;
1378   PetscScalar       *array;
1379   /* BLAS integers */
1380   PetscBLASInt      lwork,lierr;
1381   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1382   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1383   /* LAPACK working arrays for SVD or POD */
1384   PetscBool         skip_lapack;
1385   PetscScalar       *work;
1386   PetscReal         *singular_vals;
1387 #if defined(PETSC_USE_COMPLEX)
1388   PetscReal         *rwork;
1389 #endif
1390 #if defined(PETSC_MISSING_LAPACK_GESVD)
1391   PetscBLASInt      Blas_one_2=1;
1392   PetscScalar       *temp_basis,*correlation_mat;
1393 #else
1394   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1395   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1396 #endif
1397   /* reuse */
1398   PetscInt          olocal_primal_size;
1399   PetscInt          *oprimal_indices_local_idxs;
1400   /* change of basis */
1401   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1402   PetscBool         boolforchange,qr_needed;
1403   PetscBT           touched,change_basis,qr_needed_idx;
1404   /* auxiliary stuff */
1405   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1406   /* some quantities */
1407   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1408   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1409 
1410 
1411   PetscFunctionBegin;
1412   /* Destroy Mat objects computed previously */
1413   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1414   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1415   /* Get index sets for faces, edges and vertices from graph */
1416   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1417     pcbddc->use_vertices = PETSC_TRUE;
1418   }
1419   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1420   /* HACKS (the two following code branches) */
1421   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1422     pcbddc->use_change_of_basis = PETSC_TRUE;
1423     pcbddc->use_change_on_faces = PETSC_FALSE;
1424   }
1425   if (pcbddc->NullSpace) {
1426     /* use_change_of_basis should be consistent among processors */
1427     PetscBool tbool = pcbddc->use_change_of_basis;
1428     ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1429   }
1430   /* print some info */
1431   if (pcbddc->dbg_flag) {
1432     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1433     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1434     i = 0;
1435     if (ISForVertices) {
1436       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1437     }
1438     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1439     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1440     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1441     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1442   }
1443   /* check if near null space is attached to global mat */
1444   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1445   if (nearnullsp) {
1446     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1447     /* remove any stored info */
1448     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1449     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1450     /* store information for BDDC solver reuse */
1451     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1452     pcbddc->onearnullspace = nearnullsp;
1453     ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1454     for (i=0;i<nnsp_size;i++) {
1455       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1456     }
1457   } else { /* if near null space is not provided BDDC uses constants by default */
1458     nnsp_size = 0;
1459     nnsp_has_cnst = PETSC_TRUE;
1460   }
1461   /* get max number of constraints on a single cc */
1462   max_constraints = nnsp_size;
1463   if (nnsp_has_cnst) max_constraints++;
1464 
1465   /*
1466        Evaluate maximum storage size needed by the procedure
1467        - temp_indices will contain start index of each constraint stored as follows
1468        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1469        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1470        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1471                                                                                                                                                          */
1472   total_counts = n_ISForFaces+n_ISForEdges;
1473   total_counts *= max_constraints;
1474   n_vertices = 0;
1475   if (ISForVertices) {
1476     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1477   }
1478   total_counts += n_vertices;
1479   ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr);
1480   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1481   total_counts = 0;
1482   max_size_of_constraint = 0;
1483   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1484     if (i<n_ISForEdges) {
1485       used_IS = &ISForEdges[i];
1486     } else {
1487       used_IS = &ISForFaces[i-n_ISForEdges];
1488     }
1489     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1490     total_counts += j;
1491     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1492   }
1493   total_counts *= max_constraints;
1494   total_counts += n_vertices;
1495   ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr);
1496   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr);
1497   ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1498   /* get local part of global near null space vectors */
1499   ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1500   for (k=0;k<nnsp_size;k++) {
1501     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1502     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504   }
1505 
1506   /* whether or not to skip lapack calls */
1507   skip_lapack = PETSC_TRUE;
1508   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1509 
1510   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1511   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1512     PetscScalar temp_work;
1513 #if defined(PETSC_MISSING_LAPACK_GESVD)
1514     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1515     ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1516     ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1517     ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1518 #if defined(PETSC_USE_COMPLEX)
1519     ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1520 #endif
1521     /* now we evaluate the optimal workspace using query with lwork=-1 */
1522     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1523     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1524     lwork = -1;
1525     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1526 #if !defined(PETSC_USE_COMPLEX)
1527     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1528 #else
1529     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1530 #endif
1531     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1532     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1533 #else /* on missing GESVD */
1534     /* SVD */
1535     PetscInt max_n,min_n;
1536     max_n = max_size_of_constraint;
1537     min_n = max_constraints;
1538     if (max_size_of_constraint < max_constraints) {
1539       min_n = max_size_of_constraint;
1540       max_n = max_constraints;
1541     }
1542     ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1543 #if defined(PETSC_USE_COMPLEX)
1544     ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1545 #endif
1546     /* now we evaluate the optimal workspace using query with lwork=-1 */
1547     lwork = -1;
1548     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1549     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1550     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1551     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1552 #if !defined(PETSC_USE_COMPLEX)
1553     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1554 #else
1555     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1556 #endif
1557     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1558     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1559 #endif /* on missing GESVD */
1560     /* Allocate optimal workspace */
1561     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1562     ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr);
1563   }
1564   /* Now we can loop on constraining sets */
1565   total_counts = 0;
1566   temp_indices[0] = 0;
1567   /* vertices */
1568   if (ISForVertices) {
1569     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1570     if (nnsp_has_cnst) { /* consider all vertices */
1571       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1572       for (i=0;i<n_vertices;i++) {
1573         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1574         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1575         total_counts++;
1576       }
1577     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1578       PetscBool used_vertex;
1579       for (i=0;i<n_vertices;i++) {
1580         used_vertex = PETSC_FALSE;
1581         k = 0;
1582         while (!used_vertex && k<nnsp_size) {
1583           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1584           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1585             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1586             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1587             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1588             total_counts++;
1589             used_vertex = PETSC_TRUE;
1590           }
1591           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1592           k++;
1593         }
1594       }
1595     }
1596     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1597     n_vertices = total_counts;
1598   }
1599 
1600   /* edges and faces */
1601   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1602     if (i<n_ISForEdges) {
1603       used_IS = &ISForEdges[i];
1604       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1605     } else {
1606       used_IS = &ISForFaces[i-n_ISForEdges];
1607       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1608     }
1609     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1610     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1611     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1612     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1613     /* change of basis should not be performed on local periodic nodes */
1614     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1615     if (nnsp_has_cnst) {
1616       PetscScalar quad_value;
1617       temp_constraints++;
1618       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1619       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1620       for (j=0;j<size_of_constraint;j++) {
1621         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1622       }
1623       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1624       total_counts++;
1625     }
1626     for (k=0;k<nnsp_size;k++) {
1627       PetscReal real_value;
1628       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1629       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1630       for (j=0;j<size_of_constraint;j++) {
1631         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1632       }
1633       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1634       /* check if array is null on the connected component */
1635       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1636       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1637       if (real_value > 0.0) { /* keep indices and values */
1638         temp_constraints++;
1639         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1640         total_counts++;
1641       }
1642     }
1643     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1644     valid_constraints = temp_constraints;
1645     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1646     if (!pcbddc->use_nnsp_true && temp_constraints) {
1647       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1648 
1649 #if defined(PETSC_MISSING_LAPACK_GESVD)
1650       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1651          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1652          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1653             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1654             from that computed using LAPACKgesvd
1655          -> This is due to a different computation of eigenvectors in LAPACKheev
1656          -> The quality of the POD-computed basis will be the same */
1657       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1658       /* Store upper triangular part of correlation matrix */
1659       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1660       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1661       for (j=0;j<temp_constraints;j++) {
1662         for (k=0;k<j+1;k++) {
1663           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1664         }
1665       }
1666       /* compute eigenvalues and eigenvectors of correlation matrix */
1667       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1668       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1669 #if !defined(PETSC_USE_COMPLEX)
1670       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1671 #else
1672       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1673 #endif
1674       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1675       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1676       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1677       j=0;
1678       while (j < temp_constraints && singular_vals[j] < tol) j++;
1679       total_counts=total_counts-j;
1680       valid_constraints = temp_constraints-j;
1681       /* scale and copy POD basis into used quadrature memory */
1682       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1683       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1684       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1685       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1686       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1687       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1688       if (j<temp_constraints) {
1689         PetscInt ii;
1690         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1691         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1692         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1693         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1694         for (k=0;k<temp_constraints-j;k++) {
1695           for (ii=0;ii<size_of_constraint;ii++) {
1696             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1697           }
1698         }
1699       }
1700 #else  /* on missing GESVD */
1701       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1702       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1703       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1704       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1705 #if !defined(PETSC_USE_COMPLEX)
1706       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1707 #else
1708       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1709 #endif
1710       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1711       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1712       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1713       k = temp_constraints;
1714       if (k > size_of_constraint) k = size_of_constraint;
1715       j = 0;
1716       while (j < k && singular_vals[k-j-1] < tol) j++;
1717       total_counts = total_counts-temp_constraints+k-j;
1718       valid_constraints = k-j;
1719 #endif /* on missing GESVD */
1720     }
1721     /* setting change_of_basis flag is safe now */
1722     if (boolforchange) {
1723       for (j=0;j<valid_constraints;j++) {
1724         PetscBTSet(change_basis,total_counts-j-1);
1725       }
1726     }
1727   }
1728   /* free index sets of faces, edges and vertices */
1729   for (i=0;i<n_ISForFaces;i++) {
1730     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1731   }
1732   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1733   for (i=0;i<n_ISForEdges;i++) {
1734     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1735   }
1736   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1737   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1738   /* map temp_indices_to_constraint in boundary numbering */
1739   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1740   if (i != temp_indices[total_counts]) {
1741     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1742   }
1743 
1744   /* free workspace */
1745   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1746     ierr = PetscFree(work);CHKERRQ(ierr);
1747 #if defined(PETSC_USE_COMPLEX)
1748     ierr = PetscFree(rwork);CHKERRQ(ierr);
1749 #endif
1750     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1751 #if defined(PETSC_MISSING_LAPACK_GESVD)
1752     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1753     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1754 #endif
1755   }
1756   for (k=0;k<nnsp_size;k++) {
1757     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1758   }
1759   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1760 
1761   /* set quantities in pcbddc data structure and store previous primal size */
1762   /* n_vertices defines the number of subdomain corners in the primal space */
1763   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1764   olocal_primal_size = pcbddc->local_primal_size;
1765   pcbddc->local_primal_size = total_counts;
1766   pcbddc->n_vertices = n_vertices;
1767   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1768 
1769   /* Create constraint matrix */
1770   /* The constraint matrix is used to compute the l2g map of primal dofs */
1771   /* so we need to set it up properly either with or without change of basis */
1772   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1773   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1774   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1775   /* array to compute a local numbering of constraints : vertices first then constraints */
1776   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
1777   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1778   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1779   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
1780   /* auxiliary stuff for basis change */
1781   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
1782   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1783 
1784   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1785   total_primal_vertices=0;
1786   for (i=0;i<pcbddc->local_primal_size;i++) {
1787     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1788     if (size_of_constraint == 1) {
1789       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1790       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1791       aux_primal_minloc[total_primal_vertices]=0;
1792       total_primal_vertices++;
1793     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1794       PetscInt min_loc,min_index;
1795       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1796       /* find first untouched local node */
1797       k = 0;
1798       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1799       min_index = global_indices[k];
1800       min_loc = k;
1801       /* search the minimum among global nodes already untouched on the cc */
1802       for (k=1;k<size_of_constraint;k++) {
1803         /* there can be more than one constraint on a single connected component */
1804         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1805           min_index = global_indices[k];
1806           min_loc = k;
1807         }
1808       }
1809       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1810       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1811       aux_primal_minloc[total_primal_vertices]=min_loc;
1812       total_primal_vertices++;
1813     }
1814   }
1815   /* determine if a QR strategy is needed for change of basis */
1816   qr_needed = PETSC_FALSE;
1817   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1818   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1819     if (PetscBTLookup(change_basis,i)) {
1820       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1821       j = 0;
1822       for (k=0;k<size_of_constraint;k++) {
1823         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1824           j++;
1825         }
1826       }
1827       /* found more than one primal dof on the cc */
1828       if (j > 1) {
1829         PetscBTSet(qr_needed_idx,i);
1830         qr_needed = PETSC_TRUE;
1831       }
1832     }
1833   }
1834   /* free workspace */
1835   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1836 
1837   /* permute indices in order to have a sorted set of vertices */
1838   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1839 
1840   /* nonzero structure of constraint matrix */
1841   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
1842   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1843   j=total_primal_vertices;
1844   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1845     if (!PetscBTLookup(change_basis,i)) {
1846       nnz[j]=temp_indices[i+1]-temp_indices[i];
1847       j++;
1848     }
1849   }
1850   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1851   ierr = PetscFree(nnz);CHKERRQ(ierr);
1852   /* set values in constraint matrix */
1853   for (i=0;i<total_primal_vertices;i++) {
1854     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1855   }
1856   total_counts = total_primal_vertices;
1857   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1858     if (!PetscBTLookup(change_basis,i)) {
1859       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1860       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1861       total_counts++;
1862     }
1863   }
1864   /* assembling */
1865   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1866   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1867   /*
1868   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1869   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1870   */
1871   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1872   if (pcbddc->use_change_of_basis) {
1873     /* dual and primal dofs on a single cc */
1874     PetscInt     dual_dofs,primal_dofs;
1875     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1876     PetscInt     primal_counter;
1877     /* working stuff for GEQRF */
1878     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1879     PetscBLASInt lqr_work;
1880     /* working stuff for UNGQR */
1881     PetscScalar  *gqr_work,lgqr_work_t;
1882     PetscBLASInt lgqr_work;
1883     /* working stuff for TRTRS */
1884     PetscScalar  *trs_rhs;
1885     PetscBLASInt Blas_NRHS;
1886     /* pointers for values insertion into change of basis matrix */
1887     PetscInt     *start_rows,*start_cols;
1888     PetscScalar  *start_vals;
1889     /* working stuff for values insertion */
1890     PetscBT      is_primal;
1891 
1892     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1893     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1894     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1895     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1896     /* work arrays */
1897     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
1898     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1899     /* nonzeros per row */
1900     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1901       if (PetscBTLookup(change_basis,i)) {
1902         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1903         if (PetscBTLookup(qr_needed_idx,i)) {
1904           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1905         } else {
1906           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1907           /* get local primal index on the cc */
1908           j = 0;
1909           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1910           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1911         }
1912       }
1913     }
1914     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1915     ierr = PetscFree(nnz);CHKERRQ(ierr);
1916     /* Set initial identity in the matrix */
1917     for (i=0;i<pcis->n_B;i++) {
1918       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1919     }
1920 
1921     if (pcbddc->dbg_flag) {
1922       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1923       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1924     }
1925 
1926 
1927     /* Now we loop on the constraints which need a change of basis */
1928     /*
1929        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1930        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1931 
1932        Basic blocks of change of basis matrix T computed
1933 
1934           - Using the following block transformation if there is only a primal dof on the cc
1935             (in the example, primal dof is the last one of the edge in LOCAL ordering
1936              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1937             | 1        0   ...        0     1 |
1938             | 0        1   ...        0     1 |
1939             |              ...                |
1940             | 0        ...            1     1 |
1941             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1942 
1943           - via QR decomposition of constraints otherwise
1944     */
1945     if (qr_needed) {
1946       /* space to store Q */
1947       ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr);
1948       /* first we issue queries for optimal work */
1949       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1950       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1951       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1952       lqr_work = -1;
1953       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1954       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1955       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1956       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
1957       lgqr_work = -1;
1958       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1959       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1960       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1961       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1962       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1963       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1964       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1965       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1966       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
1967       /* array to store scaling factors for reflectors */
1968       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
1969       /* array to store rhs and solution of triangular solver */
1970       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
1971       /* allocating workspace for check */
1972       if (pcbddc->dbg_flag) {
1973         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
1974       }
1975     }
1976     /* array to store whether a node is primal or not */
1977     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1978     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
1979     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1980     if (i != total_primal_vertices) {
1981       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1982     }
1983     for (i=0;i<total_primal_vertices;i++) {
1984       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1985     }
1986     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1987 
1988     /* loop on constraints and see whether or not they need a change of basis and compute it */
1989     /* -> using implicit ordering contained in temp_indices data */
1990     total_counts = pcbddc->n_vertices;
1991     primal_counter = total_counts;
1992     while (total_counts<pcbddc->local_primal_size) {
1993       primal_dofs = 1;
1994       if (PetscBTLookup(change_basis,total_counts)) {
1995         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1996         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1997           primal_dofs++;
1998         }
1999         /* get constraint info */
2000         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2001         dual_dofs = size_of_constraint-primal_dofs;
2002 
2003         if (pcbddc->dbg_flag) {
2004           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2005         }
2006 
2007         if (primal_dofs > 1) { /* QR */
2008 
2009           /* copy quadrature constraints for change of basis check */
2010           if (pcbddc->dbg_flag) {
2011             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2012           }
2013           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2014           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2015 
2016           /* compute QR decomposition of constraints */
2017           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2018           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2019           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2020           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2021           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2022           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2023           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2024 
2025           /* explictly compute R^-T */
2026           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2027           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2028           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2029           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2030           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2031           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2032           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2033           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2034           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2035           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2036 
2037           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2038           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2039           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2040           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2041           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2042           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2043           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2044           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2045           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2046 
2047           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2048              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2049              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2050           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2051           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2052           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2053           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2054           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2055           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2056           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2057           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2058           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2059           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2060 
2061           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2062           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2063           /* insert cols for primal dofs */
2064           for (j=0;j<primal_dofs;j++) {
2065             start_vals = &qr_basis[j*size_of_constraint];
2066             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2067             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2068           }
2069           /* insert cols for dual dofs */
2070           for (j=0,k=0;j<dual_dofs;k++) {
2071             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2072               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2073               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2074               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2075               j++;
2076             }
2077           }
2078 
2079           /* check change of basis */
2080           if (pcbddc->dbg_flag) {
2081             PetscInt   ii,jj;
2082             PetscBool valid_qr=PETSC_TRUE;
2083             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2084             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2085             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2086             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2087             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2088             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2089             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2090             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2091             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2092             for (jj=0;jj<size_of_constraint;jj++) {
2093               for (ii=0;ii<primal_dofs;ii++) {
2094                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2095                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2096               }
2097             }
2098             if (!valid_qr) {
2099               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2100               for (jj=0;jj<size_of_constraint;jj++) {
2101                 for (ii=0;ii<primal_dofs;ii++) {
2102                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2103                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2104                   }
2105                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2106                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2107                   }
2108                 }
2109               }
2110             } else {
2111               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2112             }
2113           }
2114         } else { /* simple transformation block */
2115           PetscInt row,col;
2116           PetscScalar val;
2117           for (j=0;j<size_of_constraint;j++) {
2118             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2119             if (!PetscBTLookup(is_primal,row)) {
2120               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2121               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2122               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2123             } else {
2124               for (k=0;k<size_of_constraint;k++) {
2125                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2126                 if (row != col) {
2127                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2128                 } else {
2129                   val = 1.0;
2130                 }
2131                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2132               }
2133             }
2134           }
2135           if (pcbddc->dbg_flag) {
2136             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2137           }
2138         }
2139         /* increment primal counter */
2140         primal_counter += primal_dofs;
2141       } else {
2142         if (pcbddc->dbg_flag) {
2143           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2144         }
2145       }
2146       /* increment constraint counter total_counts */
2147       total_counts += primal_dofs;
2148     }
2149 
2150     /* free workspace */
2151     if (qr_needed) {
2152       if (pcbddc->dbg_flag) {
2153         ierr = PetscFree(work);CHKERRQ(ierr);
2154       }
2155       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2156       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2157       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2158       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2159       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2160     }
2161     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2162     /* assembling */
2163     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2164     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2165     /*
2166     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2167     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2168     */
2169   }
2170   /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */
2171   if (pcbddc->user_ChangeOfBasisMatrix) {
2172     PetscInt rows,cols;
2173     ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr);
2174     if (rows == pcis->n && cols == pcis->n) {
2175       ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2176     } else {
2177       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2178       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2179     }
2180   }
2181 
2182   /* get indices in local ordering for vertices and constraints */
2183   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2184     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2185     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2186   }
2187   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2188   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2189   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2190   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2191   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2192   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2193   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2194   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2195   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2196   /* set quantities in PCBDDC data struct */
2197   pcbddc->n_actual_vertices = i;
2198   /* check if a new primal space has been introduced */
2199   pcbddc->new_primal_space_local = PETSC_TRUE;
2200   if (olocal_primal_size == pcbddc->local_primal_size) {
2201     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2202     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2203     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2204   }
2205   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2206   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2207 
2208   /* flush dbg viewer */
2209   if (pcbddc->dbg_flag) {
2210     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2211   }
2212 
2213   /* free workspace */
2214   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2215   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2216   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2217   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2218   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2219   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2220   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2221   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 #undef __FUNCT__
2226 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2227 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2228 {
2229   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2230   PC_IS       *pcis = (PC_IS*)pc->data;
2231   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2232   PetscInt    ierr,i,vertex_size;
2233   PetscViewer viewer=pcbddc->dbg_viewer;
2234 
2235   PetscFunctionBegin;
2236   /* Reset previously computed graph */
2237   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2238   /* Init local Graph struct */
2239   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2240 
2241   /* Check validity of the csr graph passed in by the user */
2242   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2243     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2244   }
2245 
2246   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2247   if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) {
2248     Mat mat_adj;
2249     const PetscInt *xadj,*adjncy;
2250     PetscBool flg_row=PETSC_TRUE;
2251 
2252     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2253     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2254     if (!flg_row) {
2255       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2256     }
2257     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2258     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2259     if (!flg_row) {
2260       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2261     }
2262     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2263   }
2264 
2265   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2266   vertex_size = 1;
2267   if (pcbddc->user_provided_isfordofs) {
2268     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2269       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2270       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2271         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2272         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2273       }
2274       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2275       pcbddc->n_ISForDofs = 0;
2276       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2277     }
2278     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2279     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2280   } else {
2281     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2282       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2283       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2284       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2285         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2286       }
2287     }
2288   }
2289 
2290   /* Setup of Graph */
2291   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2292     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2293   }
2294   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2295     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2296   }
2297   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2298 
2299   /* Graph's connected components analysis */
2300   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2301 
2302   /* print some info to stdout */
2303   if (pcbddc->dbg_flag) {
2304     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2305   }
2306 
2307   /* mark topography has done */
2308   pcbddc->recompute_topography = PETSC_FALSE;
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 #undef __FUNCT__
2313 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2314 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2315 {
2316   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2317   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2318   PetscErrorCode ierr;
2319 
2320   PetscFunctionBegin;
2321   n = 0;
2322   vertices = 0;
2323   if (pcbddc->ConstraintMatrix) {
2324     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2325     for (i=0;i<local_primal_size;i++) {
2326       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2327       if (size_of_constraint == 1) n++;
2328       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2329     }
2330     if (vertices_idx) {
2331       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2332       n = 0;
2333       for (i=0;i<local_primal_size;i++) {
2334         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2335         if (size_of_constraint == 1) {
2336           vertices[n++]=row_cmat_indices[0];
2337         }
2338         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2339       }
2340     }
2341   }
2342   *n_vertices = n;
2343   if (vertices_idx) *vertices_idx = vertices;
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2349 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2350 {
2351   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2352   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2353   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2354   PetscBT        touched;
2355   PetscErrorCode ierr;
2356 
2357     /* This function assumes that the number of local constraints per connected component
2358        is not greater than the number of nodes defined for the connected component
2359        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2360   PetscFunctionBegin;
2361   n = 0;
2362   constraints_index = 0;
2363   if (pcbddc->ConstraintMatrix) {
2364     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2365     max_size_of_constraint = 0;
2366     for (i=0;i<local_primal_size;i++) {
2367       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2368       if (size_of_constraint > 1) {
2369         n++;
2370       }
2371       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2372       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2373     }
2374     if (constraints_idx) {
2375       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
2376       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
2377       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2378       n = 0;
2379       for (i=0;i<local_primal_size;i++) {
2380         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2381         if (size_of_constraint > 1) {
2382           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2383           /* find first untouched local node */
2384           j = 0;
2385           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2386           min_index = row_cmat_global_indices[j];
2387           min_loc = j;
2388           /* search the minimum among nodes not yet touched on the connected component
2389              since there can be more than one constraint on a single cc */
2390           for (j=1;j<size_of_constraint;j++) {
2391             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2392               min_index = row_cmat_global_indices[j];
2393               min_loc = j;
2394             }
2395           }
2396           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2397           constraints_index[n++] = row_cmat_indices[min_loc];
2398         }
2399         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2400       }
2401       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2402       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2403     }
2404   }
2405   *n_constraints = n;
2406   if (constraints_idx) *constraints_idx = constraints_index;
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 /* the next two functions has been adapted from pcis.c */
2411 #undef __FUNCT__
2412 #define __FUNCT__ "PCBDDCApplySchur"
2413 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2414 {
2415   PetscErrorCode ierr;
2416   PC_IS          *pcis = (PC_IS*)(pc->data);
2417 
2418   PetscFunctionBegin;
2419   if (!vec2_B) { vec2_B = v; }
2420   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2421   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2422   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2423   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2424   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2430 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2431 {
2432   PetscErrorCode ierr;
2433   PC_IS          *pcis = (PC_IS*)(pc->data);
2434 
2435   PetscFunctionBegin;
2436   if (!vec2_B) { vec2_B = v; }
2437   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2438   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2439   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2440   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2441   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #undef __FUNCT__
2446 #define __FUNCT__ "PCBDDCSubsetNumbering"
2447 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2448 {
2449   Vec            local_vec,global_vec;
2450   IS             seqis,paris;
2451   VecScatter     scatter_ctx;
2452   PetscScalar    *array;
2453   PetscInt       *temp_global_dofs;
2454   PetscScalar    globalsum;
2455   PetscInt       i,j,s;
2456   PetscInt       nlocals,first_index,old_index,max_local;
2457   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2458   PetscMPIInt    *dof_sizes,*dof_displs;
2459   PetscBool      first_found;
2460   PetscErrorCode ierr;
2461 
2462   PetscFunctionBegin;
2463   /* mpi buffers */
2464   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2465   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2466   j = ( !rank_prec_comm ? size_prec_comm : 0);
2467   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
2468   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
2469   /* get maximum size of subset */
2470   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2471   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2472   max_local = 0;
2473   for (i=0;i<n_local_dofs;i++) {
2474     if (max_local < temp_global_dofs[i] ) {
2475       max_local = temp_global_dofs[i];
2476     }
2477   }
2478   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
2479   max_global++;
2480   max_local = 0;
2481   for (i=0;i<n_local_dofs;i++) {
2482     if (max_local < local_dofs[i] ) {
2483       max_local = local_dofs[i];
2484     }
2485   }
2486   max_local++;
2487   /* allocate workspace */
2488   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2489   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2490   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2491   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2492   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2493   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2494   /* create scatter */
2495   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2496   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2497   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2498   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2499   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2500   /* init array */
2501   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2502   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2503   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2504   if (local_dofs_mult) {
2505     for (i=0;i<n_local_dofs;i++) {
2506       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2507     }
2508   } else {
2509     for (i=0;i<n_local_dofs;i++) {
2510       array[local_dofs[i]]=1.0;
2511     }
2512   }
2513   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2514   /* scatter into global vec and get total number of global dofs */
2515   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2516   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2517   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2518   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2519   /* Fill global_vec with cumulative function for global numbering */
2520   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2521   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2522   nlocals = 0;
2523   first_index = -1;
2524   first_found = PETSC_FALSE;
2525   for (i=0;i<s;i++) {
2526     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2527       first_found = PETSC_TRUE;
2528       first_index = i;
2529     }
2530     nlocals += (PetscInt)PetscRealPart(array[i]);
2531   }
2532   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2533   if (!rank_prec_comm) {
2534     dof_displs[0]=0;
2535     for (i=1;i<size_prec_comm;i++) {
2536       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2537     }
2538   }
2539   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2540   if (first_found) {
2541     array[first_index] += (PetscScalar)nlocals;
2542     old_index = first_index;
2543     for (i=first_index+1;i<s;i++) {
2544       if (PetscRealPart(array[i]) > 0.1) {
2545         array[i] += array[old_index];
2546         old_index = i;
2547       }
2548     }
2549   }
2550   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2551   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2552   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2553   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2554   /* get global ordering of local dofs */
2555   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2556   if (local_dofs_mult) {
2557     for (i=0;i<n_local_dofs;i++) {
2558       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2559     }
2560   } else {
2561     for (i=0;i<n_local_dofs;i++) {
2562       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2563     }
2564   }
2565   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2566   /* free workspace */
2567   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2568   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2569   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2570   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2571   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2572   /* return pointer to global ordering of local dofs */
2573   *global_numbering_subset = temp_global_dofs;
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 #undef __FUNCT__
2578 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2579 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2580 {
2581   PetscInt       i,j;
2582   PetscScalar    *alphas;
2583   PetscErrorCode ierr;
2584 
2585   PetscFunctionBegin;
2586   /* this implements stabilized Gram-Schmidt */
2587   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
2588   for (i=0;i<n;i++) {
2589     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2590     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2591     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2592   }
2593   ierr = PetscFree(alphas);CHKERRQ(ierr);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 /* TODO
2598    - now preallocation is done assuming SEQDENSE local matrices
2599 */
2600 #undef __FUNCT__
2601 #define __FUNCT__ "MatISGetMPIXAIJ"
2602 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2603 {
2604   Mat                    new_mat;
2605   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2606   /* info on mat */
2607   /* ISLocalToGlobalMapping rmapping,cmapping; */
2608   PetscInt               bs,rows,cols;
2609   PetscInt               lrows,lcols;
2610   PetscInt               local_rows,local_cols;
2611   PetscBool              isdense;
2612   /* values insertion */
2613   PetscScalar            *array;
2614   PetscInt               *local_indices,*global_indices;
2615   /* work */
2616   PetscInt               i,j,index_row;
2617   PetscErrorCode         ierr;
2618 
2619   PetscFunctionBegin;
2620   /* MISSING CHECKS
2621     - rectangular case not covered (it is not allowed by MATIS)
2622   */
2623   /* get info from mat */
2624   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2625   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2626   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2627   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2628 
2629   /* work */
2630   ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr);
2631   for (i=0;i<local_rows;i++) local_indices[i]=i;
2632   /* map indices of local mat to global values */
2633   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2634   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2635   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2636 
2637   if (reuse==MAT_INITIAL_MATRIX) {
2638     Vec         vec_dnz,vec_onz;
2639     PetscScalar *my_dnz,*my_onz;
2640     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2641     PetscInt    index_col,owner;
2642     PetscMPIInt nsubdomains;
2643 
2644     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2645     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2646     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2647     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2648     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2649     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2650     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2651 
2652     /*
2653       preallocation
2654     */
2655 
2656     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2657     /*
2658        Some vectors are needed to sum up properly on shared interface dofs.
2659        Preallocation macros cannot do the job.
2660        Note that preallocation is not exact, since it overestimates nonzeros
2661     */
2662     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2663     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2664     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2665     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2666     /* All processes need to compute entire row ownership */
2667     ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr);
2668     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2669     for (i=0;i<nsubdomains;i++) {
2670       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2671         row_ownership[j]=i;
2672       }
2673     }
2674 
2675     /*
2676        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2677        then, they will be summed up properly. This way, preallocation is always sufficient
2678     */
2679     ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr);
2680     ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr);
2681     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2682     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2683     for (i=0;i<local_rows;i++) {
2684       index_row = global_indices[i];
2685       for (j=i;j<local_rows;j++) {
2686         owner = row_ownership[index_row];
2687         index_col = global_indices[j];
2688         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2689           my_dnz[i] += 1.0;
2690         } else { /* offdiag block */
2691           my_onz[i] += 1.0;
2692         }
2693         /* same as before, interchanging rows and cols */
2694         if (i != j) {
2695           owner = row_ownership[index_col];
2696           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2697             my_dnz[j] += 1.0;
2698           } else {
2699             my_onz[j] += 1.0;
2700           }
2701         }
2702       }
2703     }
2704     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2705     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2706     if (local_rows) { /* multilevel guard */
2707       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2708       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2709     }
2710     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2711     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2712     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2713     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2714     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2715     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2716     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2717 
2718     /* set computed preallocation in dnz and onz */
2719     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2720     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2721     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2722     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2723     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2724     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2725     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2726     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2727 
2728     /* Resize preallocation if overestimated */
2729     for (i=0;i<lrows;i++) {
2730       dnz[i] = PetscMin(dnz[i],lcols);
2731       onz[i] = PetscMin(onz[i],cols-lcols);
2732     }
2733     /* set preallocation */
2734     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2735     for (i=0;i<lrows/bs;i++) {
2736       dnz[i] = dnz[i*bs]/bs;
2737       onz[i] = onz[i*bs]/bs;
2738     }
2739     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2740     for (i=0;i<lrows/bs;i++) {
2741       dnz[i] = dnz[i]-i;
2742     }
2743     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2744     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2745     *M = new_mat;
2746   } else {
2747     PetscInt mbs,mrows,mcols;
2748     /* some checks */
2749     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2750     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2751     if (mrows != rows) {
2752       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2753     }
2754     if (mrows != rows) {
2755       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2756     }
2757     if (mbs != bs) {
2758       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2759     }
2760     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2761   }
2762   /* set local to global mappings */
2763   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2764   /* Set values */
2765   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2766   if (isdense) { /* special case for dense local matrices */
2767     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2768     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2769     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2770     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2771     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2772     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2773   } else { /* very basic values insertion for all other matrix types */
2774     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2775     for (i=0;i<local_rows;i++) {
2776       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2777       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2778       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2779       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2780       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2781       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2782     }
2783     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2784   }
2785   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2786   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2787   if (isdense) {
2788     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2789   }
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 #undef __FUNCT__
2794 #define __FUNCT__ "MatISGetSubassemblingPattern"
2795 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2796 {
2797   Mat             subdomain_adj;
2798   IS              new_ranks,ranks_send_to;
2799   MatPartitioning partitioner;
2800   Mat_IS          *matis;
2801   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2802   PetscInt        prank;
2803   PetscMPIInt     size,rank,color;
2804   PetscInt        *xadj,*adjncy,*oldranks;
2805   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2806   PetscInt        i,j,local_size,threshold=0;
2807   PetscErrorCode  ierr;
2808   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2809   PetscSubcomm    subcomm;
2810 
2811   PetscFunctionBegin;
2812   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2813   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2814   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2815 
2816   /* Get info on mapping */
2817   matis = (Mat_IS*)(mat->data);
2818   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2819   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2820 
2821   /* build local CSR graph of subdomains' connectivity */
2822   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
2823   xadj[0] = 0;
2824   xadj[1] = PetscMax(n_neighs-1,0);
2825   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
2826   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
2827 
2828   if (threshold) {
2829     PetscInt* count,min_threshold;
2830     ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr);
2831     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2832     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2833       for (j=0;j<n_shared[i];j++) {
2834         count[shared[i][j]] += 1;
2835       }
2836     }
2837     /* adapt threshold since we dont want to lose significant connections */
2838     min_threshold = n_neighs;
2839     for (i=1;i<n_neighs;i++) {
2840       for (j=0;j<n_shared[i];j++) {
2841         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2842       }
2843     }
2844     threshold = PetscMax(min_threshold+1,threshold);
2845     xadj[1] = 0;
2846     for (i=1;i<n_neighs;i++) {
2847       for (j=0;j<n_shared[i];j++) {
2848         if (count[shared[i][j]] < threshold) {
2849           adjncy[xadj[1]] = neighs[i];
2850           adjncy_wgt[xadj[1]] = n_shared[i];
2851           xadj[1]++;
2852           break;
2853         }
2854       }
2855     }
2856     ierr = PetscFree(count);CHKERRQ(ierr);
2857   } else {
2858     if (xadj[1]) {
2859       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2860       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2861     }
2862   }
2863   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2864   if (use_square) {
2865     for (i=0;i<xadj[1];i++) {
2866       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2867     }
2868   }
2869   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2870 
2871   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2872 
2873   /*
2874     Restrict work on active processes only.
2875   */
2876   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2877   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2878   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2879   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2880   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2881   if (color) {
2882     ierr = PetscFree(xadj);CHKERRQ(ierr);
2883     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2884     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2885   } else {
2886     PetscInt coarsening_ratio;
2887     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2888     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
2889     prank = rank;
2890     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2891     /*
2892     for (i=0;i<size;i++) {
2893       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2894     }
2895     */
2896     for (i=0;i<xadj[1];i++) {
2897       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2898     }
2899     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2900     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2901     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2902 
2903     /* Partition */
2904     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2905     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2906     if (use_vwgt) {
2907       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2908       v_wgt[0] = local_size;
2909       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2910     }
2911     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2912     coarsening_ratio = size/n_subdomains;
2913     /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
2914     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
2915     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2916     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2917     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2918 
2919     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2920     if (contiguous) {
2921       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2922     } else {
2923       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2924     }
2925     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2926     /* clean up */
2927     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2928     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2929     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2930     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2931   }
2932   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2933 
2934   /* assemble parallel IS for sends */
2935   i = 1;
2936   if (color) i=0;
2937   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2938 
2939   /* get back IS */
2940   *is_sends = ranks_send_to;
2941   PetscFunctionReturn(0);
2942 }
2943 
2944 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2945 
2946 #undef __FUNCT__
2947 #define __FUNCT__ "MatISSubassemble"
2948 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2949 {
2950   Mat                    local_mat;
2951   Mat_IS                 *matis;
2952   IS                     is_sends_internal;
2953   PetscInt               rows,cols;
2954   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2955   PetscBool              ismatis,isdense,destroy_mat;
2956   ISLocalToGlobalMapping l2gmap;
2957   PetscInt*              l2gmap_indices;
2958   const PetscInt*        is_indices;
2959   MatType                new_local_type;
2960   /* buffers */
2961   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2962   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
2963   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2964   /* MPI */
2965   MPI_Comm               comm,comm_n;
2966   PetscSubcomm           subcomm;
2967   PetscMPIInt            n_sends,n_recvs,commsize;
2968   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
2969   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
2970   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
2971   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
2972   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
2973   PetscErrorCode         ierr;
2974 
2975   PetscFunctionBegin;
2976   /* TODO: add missing checks */
2977   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
2978   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
2979   PetscValidLogicalCollectiveEnum(mat,reuse,5);
2980   PetscValidLogicalCollectiveInt(mat,nis,7);
2981   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2982   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
2983   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2984   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2985   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2986   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2987   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2988   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
2989     PetscInt mrows,mcols,mnrows,mncols;
2990     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2991     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2992     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2993     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2994     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2995     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2996   }
2997   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2998   PetscValidLogicalCollectiveInt(mat,bs,0);
2999   /* prepare IS for sending if not provided */
3000   if (!is_sends) {
3001     PetscBool pcontig = PETSC_TRUE;
3002     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3003     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3004   } else {
3005     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3006     is_sends_internal = is_sends;
3007   }
3008 
3009   /* get pointer of MATIS data */
3010   matis = (Mat_IS*)mat->data;
3011 
3012   /* get comm */
3013   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3014 
3015   /* compute number of sends */
3016   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3017   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3018 
3019   /* compute number of receives */
3020   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3021   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3022   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3023   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3024   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3025   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3026   ierr = PetscFree(iflags);CHKERRQ(ierr);
3027 
3028   /* restrict comm if requested */
3029   subcomm = 0;
3030   destroy_mat = PETSC_FALSE;
3031   if (restrict_comm) {
3032     PetscMPIInt color,rank,subcommsize;
3033     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3034     color = 0;
3035     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3036     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3037     subcommsize = commsize - subcommsize;
3038     /* check if reuse has been requested */
3039     if (reuse == MAT_REUSE_MATRIX) {
3040       if (*mat_n) {
3041         PetscMPIInt subcommsize2;
3042         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3043         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3044         comm_n = PetscObjectComm((PetscObject)*mat_n);
3045       } else {
3046         comm_n = PETSC_COMM_SELF;
3047       }
3048     } else { /* MAT_INITIAL_MATRIX */
3049       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3050       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3051       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3052       comm_n = subcomm->comm;
3053     }
3054     /* flag to destroy *mat_n if not significative */
3055     if (color) destroy_mat = PETSC_TRUE;
3056   } else {
3057     comm_n = comm;
3058   }
3059 
3060   /* prepare send/receive buffers */
3061   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3062   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3063   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3064   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3065   if (nis) {
3066     ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr);
3067     ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr);
3068   }
3069 
3070   /* Get data from local matrices */
3071   if (!isdense) {
3072     /* TODO: See below some guidelines on how to prepare the local buffers */
3073     /*
3074        send_buffer_vals should contain the raw values of the local matrix
3075        send_buffer_idxs should contain:
3076        - MatType_PRIVATE type
3077        - PetscInt        size_of_l2gmap
3078        - PetscInt        global_row_indices[size_of_l2gmap]
3079        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3080     */
3081   } else {
3082     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3083     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3084     ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr);
3085     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3086     send_buffer_idxs[1] = i;
3087     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3088     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3089     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3090     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3091     for (i=0;i<n_sends;i++) {
3092       ilengths_vals[is_indices[i]] = len*len;
3093       ilengths_idxs[is_indices[i]] = len+2;
3094     }
3095   }
3096   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3097   /* additional is (if any) */
3098   if (nis) {
3099     PetscMPIInt psum;
3100     PetscInt j;
3101     for (j=0,psum=0;j<nis;j++) {
3102       PetscInt plen;
3103       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3104       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3105       psum += len+1; /* indices + lenght */
3106     }
3107     ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr);
3108     for (j=0,psum=0;j<nis;j++) {
3109       PetscInt plen;
3110       const PetscInt *is_array_idxs;
3111       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3112       send_buffer_idxs_is[psum] = plen;
3113       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3114       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3115       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3116       psum += plen+1; /* indices + lenght */
3117     }
3118     for (i=0;i<n_sends;i++) {
3119       ilengths_idxs_is[is_indices[i]] = psum;
3120     }
3121     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3122   }
3123 
3124   buf_size_idxs = 0;
3125   buf_size_vals = 0;
3126   buf_size_idxs_is = 0;
3127   for (i=0;i<n_recvs;i++) {
3128     buf_size_idxs += (PetscInt)olengths_idxs[i];
3129     buf_size_vals += (PetscInt)olengths_vals[i];
3130     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3131   }
3132   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3133   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3134   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3135 
3136   /* get new tags for clean communications */
3137   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3138   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3139   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3140 
3141   /* allocate for requests */
3142   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3143   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3144   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3145   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3146   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3147   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3148 
3149   /* communications */
3150   ptr_idxs = recv_buffer_idxs;
3151   ptr_vals = recv_buffer_vals;
3152   ptr_idxs_is = recv_buffer_idxs_is;
3153   for (i=0;i<n_recvs;i++) {
3154     source_dest = onodes[i];
3155     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3156     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3157     ptr_idxs += olengths_idxs[i];
3158     ptr_vals += olengths_vals[i];
3159     if (nis) {
3160       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3161       ptr_idxs_is += olengths_idxs_is[i];
3162     }
3163   }
3164   for (i=0;i<n_sends;i++) {
3165     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3166     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3167     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3168     if (nis) {
3169       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3170     }
3171   }
3172   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3173   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3174 
3175   /* assemble new l2g map */
3176   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3177   ptr_idxs = recv_buffer_idxs;
3178   buf_size_idxs = 0;
3179   for (i=0;i<n_recvs;i++) {
3180     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3181     ptr_idxs += olengths_idxs[i];
3182   }
3183   ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr);
3184   ptr_idxs = recv_buffer_idxs;
3185   buf_size_idxs = 0;
3186   for (i=0;i<n_recvs;i++) {
3187     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3188     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3189     ptr_idxs += olengths_idxs[i];
3190   }
3191   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3192   ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3193   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3194 
3195   /* infer new local matrix type from received local matrices type */
3196   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3197   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3198   if (n_recvs) {
3199     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3200     ptr_idxs = recv_buffer_idxs;
3201     for (i=0;i<n_recvs;i++) {
3202       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3203         new_local_type_private = MATAIJ_PRIVATE;
3204         break;
3205       }
3206       ptr_idxs += olengths_idxs[i];
3207     }
3208     switch (new_local_type_private) {
3209       case MATDENSE_PRIVATE:
3210         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3211           new_local_type = MATSEQAIJ;
3212           bs = 1;
3213         } else { /* if I receive only 1 dense matrix */
3214           new_local_type = MATSEQDENSE;
3215           bs = 1;
3216         }
3217         break;
3218       case MATAIJ_PRIVATE:
3219         new_local_type = MATSEQAIJ;
3220         bs = 1;
3221         break;
3222       case MATBAIJ_PRIVATE:
3223         new_local_type = MATSEQBAIJ;
3224         break;
3225       case MATSBAIJ_PRIVATE:
3226         new_local_type = MATSEQSBAIJ;
3227         break;
3228       default:
3229         SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3230         break;
3231     }
3232   } else { /* by default, new_local_type is seqdense */
3233     new_local_type = MATSEQDENSE;
3234     bs = 1;
3235   }
3236 
3237   /* create MATIS object if needed */
3238   if (reuse == MAT_INITIAL_MATRIX) {
3239     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3240     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3241   } else {
3242     /* it also destroys the local matrices */
3243     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3244   }
3245   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3246   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3247   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3248   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3249 
3250   /* set values */
3251   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3252   ptr_vals = recv_buffer_vals;
3253   ptr_idxs = recv_buffer_idxs;
3254   for (i=0;i<n_recvs;i++) {
3255     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3256       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3257       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3258       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3259       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3260       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3261     } else {
3262       /* TODO */
3263     }
3264     ptr_idxs += olengths_idxs[i];
3265     ptr_vals += olengths_vals[i];
3266   }
3267   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3268   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3269   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3270   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3271 
3272 #if 0
3273   if (!restrict_comm) { /* check */
3274     Vec       lvec,rvec;
3275     PetscReal infty_error;
3276 
3277     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3278     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3279     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3280     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3281     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3282     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3283     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3284     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3285     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3286   }
3287 #endif
3288 
3289   /* assemble new additional is (if any) */
3290   if (nis) {
3291     PetscInt **temp_idxs,*count_is,j,psum;
3292 
3293     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3294     ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr);
3295     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3296     ptr_idxs = recv_buffer_idxs_is;
3297     psum = 0;
3298     for (i=0;i<n_recvs;i++) {
3299       for (j=0;j<nis;j++) {
3300         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3301         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3302         psum += plen;
3303         ptr_idxs += plen+1; /* shift pointer to received data */
3304       }
3305     }
3306     ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr);
3307     ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr);
3308     for (i=1;i<nis;i++) {
3309       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3310     }
3311     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3312     ptr_idxs = recv_buffer_idxs_is;
3313     for (i=0;i<n_recvs;i++) {
3314       for (j=0;j<nis;j++) {
3315         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3316         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3317         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3318         ptr_idxs += plen+1; /* shift pointer to received data */
3319       }
3320     }
3321     for (i=0;i<nis;i++) {
3322       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3323       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3324       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3325     }
3326     ierr = PetscFree(count_is);CHKERRQ(ierr);
3327     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3328     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3329   }
3330   /* free workspace */
3331   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3332   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3333   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3334   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3335   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3336   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3337   if (isdense) {
3338     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3339     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3340   } else {
3341     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3342   }
3343   if (nis) {
3344     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3345     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3346   }
3347   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3348   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3349   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3350   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3351   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3352   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3353   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3354   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3355   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3356   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3357   ierr = PetscFree(onodes);CHKERRQ(ierr);
3358   if (nis) {
3359     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3360     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3361     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3362   }
3363   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3364   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3365     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3366     for (i=0;i<nis;i++) {
3367       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3368     }
3369   }
3370   PetscFunctionReturn(0);
3371 }
3372 
3373 /* temporary hack into ksp private data structure */
3374 #include <petsc-private/kspimpl.h>
3375 
3376 #undef __FUNCT__
3377 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3378 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3379 {
3380   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3381   PC_IS                  *pcis = (PC_IS*)pc->data;
3382   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3383   MatNullSpace           CoarseNullSpace=NULL;
3384   ISLocalToGlobalMapping coarse_islg;
3385   IS                     coarse_is,*isarray;
3386   PetscInt               i,im_active=-1,active_procs=-1;
3387   PetscInt               nis,nisdofs,nisneu;
3388   PC                     pc_temp;
3389   PCType                 coarse_pc_type;
3390   KSPType                coarse_ksp_type;
3391   PetscBool              multilevel_requested,multilevel_allowed;
3392   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3393   Mat                    t_coarse_mat_is;
3394   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3395   PetscMPIInt            all_procs;
3396   PetscBool              csin_ml,csin_ds,csin,csin_type_simple;
3397   PetscBool              compute_vecs = PETSC_FALSE;
3398   PetscErrorCode         ierr;
3399 
3400   PetscFunctionBegin;
3401   /* Assign global numbering to coarse dofs */
3402   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3403     compute_vecs = PETSC_TRUE;
3404     PetscInt ocoarse_size;
3405     ocoarse_size = pcbddc->coarse_size;
3406     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3407     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3408     /* see if we can avoid some work */
3409     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3410       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3411         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3412         coarse_reuse = PETSC_FALSE;
3413       } else { /* we can safely reuse already computed coarse matrix */
3414         coarse_reuse = PETSC_TRUE;
3415       }
3416     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3417       coarse_reuse = PETSC_FALSE;
3418     }
3419     /* reset any subassembling information */
3420     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3421     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3422   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3423     coarse_reuse = PETSC_TRUE;
3424   }
3425 
3426   /* count "active" (i.e. with positive local size) and "void" processes */
3427   im_active = !!(pcis->n);
3428   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3429   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3430   void_procs = all_procs-active_procs;
3431   csin_type_simple = PETSC_TRUE;
3432   if (pcbddc->current_level) {
3433     csin_ml = PETSC_TRUE;
3434     ncoarse_ml = void_procs;
3435     csin_ds = PETSC_TRUE;
3436     ncoarse_ds = void_procs;
3437     if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
3438   } else {
3439     csin_ml = PETSC_FALSE;
3440     ncoarse_ml = all_procs;
3441     if (void_procs) {
3442       csin_ds = PETSC_TRUE;
3443       ncoarse_ds = void_procs;
3444       csin_type_simple = PETSC_FALSE;
3445     } else {
3446       csin_ds = PETSC_FALSE;
3447       ncoarse_ds = all_procs;
3448     }
3449   }
3450 
3451   /*
3452     test if we can go multilevel: three conditions must be satisfied:
3453     - we have not exceeded the number of levels requested
3454     - we can actually subassemble the active processes
3455     - we can find a suitable number of MPI processes where we can place the subassembled problem
3456   */
3457   multilevel_allowed = PETSC_FALSE;
3458   multilevel_requested = PETSC_FALSE;
3459   if (pcbddc->current_level < pcbddc->max_levels) {
3460     multilevel_requested = PETSC_TRUE;
3461     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3462       multilevel_allowed = PETSC_FALSE;
3463     } else {
3464       multilevel_allowed = PETSC_TRUE;
3465     }
3466   }
3467   /* determine number of process partecipating to coarse solver */
3468   if (multilevel_allowed) {
3469     ncoarse = ncoarse_ml;
3470     csin = csin_ml;
3471   } else {
3472     ncoarse = ncoarse_ds;
3473     csin = csin_ds;
3474   }
3475 
3476   /* creates temporary l2gmap and IS for coarse indexes */
3477   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3478   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3479 
3480   /* creates temporary MATIS object for coarse matrix */
3481   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3482 #if 0
3483   {
3484     PetscViewer viewer;
3485     char filename[256];
3486     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3487     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3488     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3489     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3490     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3491   }
3492 #endif
3493   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3494   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3495   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3496   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3497   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3498 
3499   /* compute dofs splitting and neumann boundaries for coarse dofs */
3500   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3501     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3502     const PetscInt         *idxs;
3503     ISLocalToGlobalMapping tmap;
3504 
3505     /* create map between primal indices (in local representative ordering) and local primal numbering */
3506     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3507     /* allocate space for temporary storage */
3508     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3509     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3510     /* allocate for IS array */
3511     nisdofs = pcbddc->n_ISForDofsLocal;
3512     nisneu = !!pcbddc->NeumannBoundariesLocal;
3513     nis = nisdofs + nisneu;
3514     ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr);
3515     /* dofs splitting */
3516     for (i=0;i<nisdofs;i++) {
3517       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3518       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3519       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3520       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3521       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3522       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3523       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3524       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3525     }
3526     /* neumann boundaries */
3527     if (pcbddc->NeumannBoundariesLocal) {
3528       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3529       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3530       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3531       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3532       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3533       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3534       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3535       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3536     }
3537     /* free memory */
3538     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3539     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3540     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3541   } else {
3542     nis = 0;
3543     nisdofs = 0;
3544     nisneu = 0;
3545     isarray = NULL;
3546   }
3547   /* destroy no longer needed map */
3548   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3549 
3550   /* restrict on coarse candidates (if needed) */
3551   coarse_mat_is = NULL;
3552   if (csin) {
3553     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3554       PetscInt j,tissize,*nisindices;
3555       PetscInt *coarse_candidates;
3556       const PetscInt* tisindices;
3557       /* get coarse candidates' ranks in pc communicator */
3558       ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr);
3559       ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3560       for (i=0,j=0;i<all_procs;i++) {
3561         if (!coarse_candidates[i]) {
3562           coarse_candidates[j]=i;
3563           j++;
3564         }
3565       }
3566       if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3567       /* get a suitable subassembling pattern */
3568       if (csin_type_simple) {
3569         PetscMPIInt rank;
3570         PetscInt    issize,isidx;
3571         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3572         if (im_active) {
3573           issize = 1;
3574           isidx = (PetscInt)rank;
3575         } else {
3576           issize = 0;
3577           isidx = -1;
3578         }
3579         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3580       } else {
3581         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3582       }
3583       if (pcbddc->dbg_flag) {
3584         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3585         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3586         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3587         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3588         for (i=0;i<j;i++) {
3589           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3590         }
3591         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3592         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3593       }
3594       /* shift the pattern on coarse candidates */
3595       ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
3596       ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3597       ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr);
3598       for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3599       ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
3600       ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
3601       ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
3602     }
3603     if (pcbddc->dbg_flag) {
3604       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3605       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
3606       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3607       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3608     }
3609     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3610     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
3611   } else {
3612     if (pcbddc->dbg_flag) {
3613       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3614       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
3615       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3616     }
3617     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
3618     coarse_mat_is = t_coarse_mat_is;
3619   }
3620 
3621   /* create local to global scatters for coarse problem */
3622   if (compute_vecs) {
3623     PetscInt lrows;
3624     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3625     if (coarse_mat_is) {
3626       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
3627     } else {
3628       lrows = 0;
3629     }
3630     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
3631     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
3632     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
3633     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3634     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3635   }
3636   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3637   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
3638 
3639   /* set defaults for coarse KSP and PC */
3640   if (multilevel_allowed) {
3641     coarse_ksp_type = KSPRICHARDSON;
3642     coarse_pc_type = PCBDDC;
3643   } else {
3644     coarse_ksp_type = KSPPREONLY;
3645     coarse_pc_type = PCREDUNDANT;
3646   }
3647 
3648   /* print some info if requested */
3649   if (pcbddc->dbg_flag) {
3650     if (!multilevel_allowed) {
3651       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3652       if (multilevel_requested) {
3653         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3654       } else if (pcbddc->max_levels) {
3655         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3656       }
3657       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3658     }
3659   }
3660 
3661   /* create the coarse KSP object only once with defaults */
3662   if (coarse_mat_is) {
3663     MatReuse coarse_mat_reuse;
3664     PetscViewer dbg_viewer = NULL;
3665     if (pcbddc->dbg_flag) {
3666       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3667       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3668     }
3669     if (!pcbddc->coarse_ksp) {
3670       char prefix[256],str_level[16];
3671       size_t len;
3672       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3673       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3674       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3675       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
3676       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3677       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3678       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3679       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3680       /* prefix */
3681       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3682       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3683       if (!pcbddc->current_level) {
3684         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3685         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3686       } else {
3687         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3688         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3689         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3690         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
3691         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3692         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3693       }
3694       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3695       /* allow user customization */
3696       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3697       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
3698     }
3699 
3700     /* get some info after set from options */
3701     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3702     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3703     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3704     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
3705     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3706       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3707       isbddc = PETSC_FALSE;
3708     }
3709     if (isredundant) {
3710       KSP inner_ksp;
3711       PC inner_pc;
3712       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
3713       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
3714       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
3715     }
3716 
3717     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3718     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3719     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3720     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3721     if (nisdofs) {
3722       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
3723       for (i=0;i<nisdofs;i++) {
3724         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3725       }
3726     }
3727     if (nisneu) {
3728       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
3729       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
3730     }
3731 
3732     /* assemble coarse matrix */
3733     if (coarse_reuse) {
3734       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
3735       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3736       coarse_mat_reuse = MAT_REUSE_MATRIX;
3737     } else {
3738       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3739     }
3740     if (isbddc || isnn) {
3741       if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3742         ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3743         if (pcbddc->dbg_flag) {
3744           ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3745           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3746           ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
3747           ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3748         }
3749       }
3750       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
3751     } else {
3752       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
3753     }
3754     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3755 
3756     /* propagate symmetry info to coarse matrix */
3757     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
3758     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3759 
3760     /* set operators */
3761     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3762     if (pcbddc->dbg_flag) {
3763       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
3764     }
3765   } else { /* processes non partecipating to coarse solver (if any) */
3766     coarse_mat = 0;
3767   }
3768   ierr = PetscFree(isarray);CHKERRQ(ierr);
3769 #if 0
3770   {
3771     PetscViewer viewer;
3772     char filename[256];
3773     sprintf(filename,"coarse_mat.m");
3774     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
3775     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3776     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
3777     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3778   }
3779 #endif
3780 
3781   /* Compute coarse null space (special handling by BDDC only) */
3782   if (pcbddc->NullSpace) {
3783     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3784   }
3785 
3786   if (pcbddc->coarse_ksp) {
3787     Vec crhs,csol;
3788     PetscBool ispreonly;
3789     if (CoarseNullSpace) {
3790       if (isbddc) {
3791         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3792       } else {
3793         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3794       }
3795     }
3796     /* setup coarse ksp */
3797     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3798     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
3799     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
3800     /* hack */
3801     if (!csol) {
3802       ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
3803     }
3804     if (!crhs) {
3805       ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
3806     }
3807     /* Check coarse problem if in debug mode or if solving with an iterative method */
3808     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3809     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3810       KSP       check_ksp;
3811       KSPType   check_ksp_type;
3812       PC        check_pc;
3813       Vec       check_vec,coarse_vec;
3814       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3815       PetscInt  its;
3816       PetscBool compute_eigs;
3817       PetscReal *eigs_r,*eigs_c;
3818       PetscInt  neigs;
3819       const char *prefix;
3820 
3821       /* Create ksp object suitable for estimation of extreme eigenvalues */
3822       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
3823       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
3824       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3825       if (ispreonly) {
3826         check_ksp_type = KSPPREONLY;
3827         compute_eigs = PETSC_FALSE;
3828       } else {
3829         check_ksp_type = KSPGMRES;
3830         compute_eigs = PETSC_TRUE;
3831       }
3832       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3833       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
3834       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
3835       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
3836       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
3837       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
3838       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
3839       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
3840       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3841       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3842       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3843       /* create random vec */
3844       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
3845       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
3846       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3847       if (CoarseNullSpace) {
3848         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3849       }
3850       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3851       /* solve coarse problem */
3852       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
3853       if (CoarseNullSpace) {
3854         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
3855       }
3856       /* set eigenvalue estimation if preonly has not been requested */
3857       if (compute_eigs) {
3858         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr);
3859         ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr);
3860         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
3861         lambda_max = eigs_r[neigs-1];
3862         lambda_min = eigs_r[0];
3863         if (pcbddc->use_coarse_estimates) {
3864           if (lambda_max>lambda_min) {
3865             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3866             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3867           }
3868         }
3869       }
3870 
3871       /* check coarse problem residual error */
3872       if (pcbddc->dbg_flag) {
3873         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3874         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3875         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
3876         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3877         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
3878         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3879         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3880         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
3881         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
3882         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
3883         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3884         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3885         if (compute_eigs) {
3886           PetscReal lambda_max_s,lambda_min_s;
3887           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3888           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
3889           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
3890           for (i=0;i<neigs;i++) {
3891             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
3892           }
3893         }
3894         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
3895         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
3896       }
3897       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3898       if (compute_eigs) {
3899         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
3900         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
3901       }
3902     }
3903   }
3904   /* print additional info */
3905   if (pcbddc->dbg_flag) {
3906     /* waits until all processes reaches this point */
3907     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
3908     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3909     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3910   }
3911 
3912   /* free memory */
3913   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3914   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3915   PetscFunctionReturn(0);
3916 }
3917 
3918 #undef __FUNCT__
3919 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3920 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3921 {
3922   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3923   PC_IS*         pcis = (PC_IS*)pc->data;
3924   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3925   PetscInt       i,coarse_size;
3926   PetscInt       *local_primal_indices;
3927   PetscErrorCode ierr;
3928 
3929   PetscFunctionBegin;
3930   /* Compute global number of coarse dofs */
3931   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3932     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3933   }
3934   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3935 
3936   /* check numbering */
3937   if (pcbddc->dbg_flag) {
3938     PetscScalar coarsesum,*array;
3939     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
3940 
3941     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3942     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3943     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3944     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3945     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3946     for (i=0;i<pcbddc->local_primal_size;i++) {
3947       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3948     }
3949     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3950     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3951     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3952     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3953     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3954     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3955     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3956     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3957     for (i=0;i<pcis->n;i++) {
3958       if (array[i] == 1.0) {
3959         set_error = PETSC_TRUE;
3960         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3961       }
3962     }
3963     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3964     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3965     for (i=0;i<pcis->n;i++) {
3966       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3967     }
3968     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3969     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3970     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3971     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3972     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3973     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3974     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
3975       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3976       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3977       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3978       for (i=0;i<pcbddc->local_primal_size;i++) {
3979         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3980       }
3981       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3982     }
3983     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3984     if (set_error_reduced) {
3985       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
3986     }
3987   }
3988   /* get back data */
3989   *coarse_size_n = coarse_size;
3990   *local_primal_indices_n = local_primal_indices;
3991   PetscFunctionReturn(0);
3992 }
3993 
3994 #undef __FUNCT__
3995 #define __FUNCT__ "PCBDDCGlobalToLocal"
3996 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
3997 {
3998   IS             localis_t;
3999   PetscInt       i,lsize,*idxs,n;
4000   PetscScalar    *vals;
4001   PetscErrorCode ierr;
4002 
4003   PetscFunctionBegin;
4004   /* get indices in local ordering exploiting local to global map */
4005   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4006   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
4007   for (i=0;i<lsize;i++) vals[i] = 1.0;
4008   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4009   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4010   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4011   if (idxs) { /* multilevel guard */
4012     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4013   }
4014   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4015   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4016   ierr = PetscFree(vals);CHKERRQ(ierr);
4017   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4018   /* now compute set in local ordering */
4019   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4020   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4021   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4022   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4023   for (i=0,lsize=0;i<n;i++) {
4024     if (PetscRealPart(vals[i]) > 0.5) {
4025       lsize++;
4026     }
4027   }
4028   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
4029   for (i=0,lsize=0;i<n;i++) {
4030     if (PetscRealPart(vals[i]) > 0.5) {
4031       idxs[lsize++] = i;
4032     }
4033   }
4034   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4035   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4036   *localis = localis_t;
4037   PetscFunctionReturn(0);
4038 }
4039