xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision c688d0420b4e513ff34944d1e1ad7d4e50aafa8d)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
29 
30   if (pcbddc->dbg_flag) {
31     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
32     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
33     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
35   }
36 
37   if (pcbddc->dbg_flag) {
38     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
39   }
40 
41   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
42 
43   /* max size of subsets */
44   mss = 0;
45   for (i=0;i<sub_schurs->n_subs;i++) {
46     PetscInt subset_size;
47 
48     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
49     mss = PetscMax(mss,subset_size);
50   }
51 
52   /* min/max and threshold */
53   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
54   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
55   nmax = PetscMax(nmin,nmax);
56   allocated_S_St = PETSC_FALSE;
57   if (nmin) {
58     allocated_S_St = PETSC_TRUE;
59   }
60 
61   /* allocate lapack workspace */
62   cum = cum2 = 0;
63   maxneigs = 0;
64   for (i=0;i<sub_schurs->n_subs;i++) {
65     PetscInt n,subset_size;
66 
67     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
68     n = PetscMin(subset_size,nmax);
69     cum += subset_size;
70     cum2 += subset_size*n;
71     maxneigs = PetscMax(maxneigs,n);
72   }
73   if (mss) {
74     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
75       PetscBLASInt B_itype = 1;
76       PetscBLASInt B_N = mss;
77       PetscReal    zero = 0.0;
78       PetscReal    eps = 0.0; /* dlamch? */
79 
80       B_lwork = -1;
81       S = NULL;
82       St = NULL;
83       eigs = NULL;
84       eigv = NULL;
85       B_iwork = NULL;
86       B_ifail = NULL;
87 #if defined(PETSC_USE_COMPLEX)
88       rwork = NULL;
89 #endif
90       thresh = 1.0;
91       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
92 #if defined(PETSC_USE_COMPLEX)
93       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
94 #else
95       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
96 #endif
97       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
98       ierr = PetscFPTrapPop();CHKERRQ(ierr);
99     } else {
100         /* TODO */
101     }
102   } else {
103     lwork = 0;
104   }
105 
106   nv = 0;
107   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
108     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
109   }
110   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
111   if (allocated_S_St) {
112     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
113   }
114   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
115 #if defined(PETSC_USE_COMPLEX)
116   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
117 #endif
118   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
119                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
120                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
121                       nv+cum,&pcbddc->adaptive_constraints_idxs,
122                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
123   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
124 
125   maxneigs = 0;
126   cum = cumarray = 0;
127   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
128   pcbddc->adaptive_constraints_data_ptr[0] = 0;
129   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
130     const PetscInt *idxs;
131 
132     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
133     for (cum=0;cum<nv;cum++) {
134       pcbddc->adaptive_constraints_n[cum] = 1;
135       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
136       pcbddc->adaptive_constraints_data[cum] = 1.0;
137       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
138       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
139     }
140     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
141   }
142 
143   if (mss) { /* multilevel */
144     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
145     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
146   }
147 
148   for (i=0;i<sub_schurs->n_subs;i++) {
149 
150     const PetscInt *idxs;
151     PetscReal      infty = PETSC_MAX_REAL;
152     PetscInt       j,subset_size,eigs_start = 0;
153     PetscBLASInt   B_N;
154     PetscBool      same_data = PETSC_FALSE;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
158     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
159       if (sub_schurs->is_hermitian) {
160         PetscInt j,k;
161         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
162           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
163           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
164         }
165         for (j=0;j<subset_size;j++) {
166           for (k=j;k<subset_size;k++) {
167             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
168             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
169           }
170         }
171       } else {
172         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
173         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
174       }
175     } else {
176       S = Sarray + cumarray;
177       St = Starray + cumarray;
178     }
179 
180     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
181     /* see if we can save some work */
182     if (sub_schurs->n_subs == 1) {
183       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
184     }
185 
186     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
187       B_neigs = 0;
188     } else {
189       /* Threshold: this is an heuristic for edges */
190       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
191 
192       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
193         PetscBLASInt B_itype = 1;
194         PetscBLASInt B_IL, B_IU;
195         PetscReal    eps = -1.0; /* dlamch? */
196         PetscInt     nmin_s;
197 
198         /* ask for eigenvalues larger than thresh */
199         if (pcbddc->dbg_flag) {
200           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
201         }
202         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
203 #if defined(PETSC_USE_COMPLEX)
204         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
205 #else
206         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
207 #endif
208         ierr = PetscFPTrapPop();CHKERRQ(ierr);
209         if (B_ierr) {
210           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
211           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
212           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
213         }
214 
215         if (B_neigs > nmax) {
216           if (pcbddc->dbg_flag) {
217             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
218           }
219           eigs_start = B_neigs -nmax;
220           B_neigs = nmax;
221         }
222 
223         nmin_s = PetscMin(nmin,B_N);
224         if (B_neigs < nmin_s) {
225           PetscBLASInt B_neigs2;
226 
227           B_IU = B_N - B_neigs;
228           B_IL = B_N - nmin_s + 1;
229           if (pcbddc->dbg_flag) {
230             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
231           }
232           if (sub_schurs->is_hermitian) {
233             PetscInt j;
234             for (j=0;j<subset_size;j++) {
235               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
236             }
237             for (j=0;j<subset_size;j++) {
238               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
239             }
240           } else {
241             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
242             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
243           }
244           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
245 #if defined(PETSC_USE_COMPLEX)
246           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
247 #else
248           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
249 #endif
250           ierr = PetscFPTrapPop();CHKERRQ(ierr);
251           B_neigs += B_neigs2;
252         }
253         if (B_ierr) {
254           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
255           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
257         }
258         if (pcbddc->dbg_flag) {
259           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
260           for (j=0;j<B_neigs;j++) {
261             if (eigs[j] == 0.0) {
262               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
263             } else {
264               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
265             }
266           }
267         }
268       } else {
269           /* TODO */
270       }
271     }
272     maxneigs = PetscMax(B_neigs,maxneigs);
273     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
274     if (B_neigs) {
275       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
276 
277       if (pcbddc->dbg_flag > 1) {
278         PetscInt ii;
279         for (ii=0;ii<B_neigs;ii++) {
280           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
281           for (j=0;j<B_N;j++) {
282 #if defined(PETSC_USE_COMPLEX)
283             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
284             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
286 #else
287             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
288 #endif
289           }
290         }
291       }
292 #if 0
293       for (j=0;j<B_neigs;j++) {
294         PetscBLASInt Blas_N,Blas_one = 1.0;
295         PetscScalar norm;
296         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
297         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
298                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
299         if (pcbddc->adaptive_constraints_data[cum] > 0.0) {
300           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
301         } else {
302           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
303         }
304         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
305       }
306 #endif
307       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
308       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
309       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
310       cum++;
311     }
312     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
313     /* shift for next computation */
314     cumarray += subset_size*subset_size;
315   }
316   if (pcbddc->dbg_flag) {
317     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
318   }
319 
320   if (mss) {
321     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
322     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
323     /* destroy matrices (junk) */
324     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
325     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
326   }
327   if (allocated_S_St) {
328     ierr = PetscFree2(S,St);CHKERRQ(ierr);
329   }
330   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
331 #if defined(PETSC_USE_COMPLEX)
332   ierr = PetscFree(rwork);CHKERRQ(ierr);
333 #endif
334   if (pcbddc->dbg_flag) {
335     PetscInt maxneigs_r;
336     ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
337     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
338   }
339   PetscFunctionReturn(0);
340 }
341 
342 #undef __FUNCT__
343 #define __FUNCT__ "PCBDDCSetUpSolvers"
344 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
345 {
346   PetscScalar    *coarse_submat_vals;
347   PetscErrorCode ierr;
348 
349   PetscFunctionBegin;
350   /* Setup local scatters R_to_B and (optionally) R_to_D */
351   /* PCBDDCSetUpLocalWorkVectors should be called first! */
352   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
353 
354   /* Setup local neumann solver ksp_R */
355   /* PCBDDCSetUpLocalScatters should be called first! */
356   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
357 
358   /*
359      Setup local correction and local part of coarse basis.
360      Gives back the dense local part of the coarse matrix in column major ordering
361   */
362   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
363 
364   /* Compute total number of coarse nodes and setup coarse solver */
365   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
366 
367   /* free */
368   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 
372 #undef __FUNCT__
373 #define __FUNCT__ "PCBDDCResetCustomization"
374 PetscErrorCode PCBDDCResetCustomization(PC pc)
375 {
376   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
377   PetscErrorCode ierr;
378 
379   PetscFunctionBegin;
380   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
381   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
386   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
389   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "PCBDDCResetTopography"
395 PetscErrorCode PCBDDCResetTopography(PC pc)
396 {
397   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
398   PetscErrorCode ierr;
399 
400   PetscFunctionBegin;
401   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
403   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
404   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
405   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "PCBDDCResetSolvers"
411 PetscErrorCode PCBDDCResetSolvers(PC pc)
412 {
413   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
414   PetscScalar    *array;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
419   if (pcbddc->coarse_phi_B) {
420     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
421     ierr = PetscFree(array);CHKERRQ(ierr);
422   }
423   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
426   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
428   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
430   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
432   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
433   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
434   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
435   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
436   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
437   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
438   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
439   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
440   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
441   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
442   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
443   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
444   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
445   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
451 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
452 {
453   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
454   PC_IS          *pcis = (PC_IS*)pc->data;
455   VecType        impVecType;
456   PetscInt       n_constraints,n_R,old_size;
457   PetscErrorCode ierr;
458 
459   PetscFunctionBegin;
460   if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
461   /* get sizes */
462   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
463   n_R = pcis->n-pcbddc->n_vertices;
464   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
465   /* local work vectors (try to avoid unneeded work)*/
466   /* R nodes */
467   old_size = -1;
468   if (pcbddc->vec1_R) {
469     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
470   }
471   if (n_R != old_size) {
472     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
473     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
474     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
475     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
476     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
477     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
478   }
479   /* local primal dofs */
480   old_size = -1;
481   if (pcbddc->vec1_P) {
482     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
483   }
484   if (pcbddc->local_primal_size != old_size) {
485     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
486     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
487     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
488     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
489   }
490   /* local explicit constraints */
491   old_size = -1;
492   if (pcbddc->vec1_C) {
493     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
494   }
495   if (n_constraints && n_constraints != old_size) {
496     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
497     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
498     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
499     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
500   }
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "PCBDDCSetUpCorrection"
506 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
507 {
508   PetscErrorCode  ierr;
509   /* pointers to pcis and pcbddc */
510   PC_IS*          pcis = (PC_IS*)pc->data;
511   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
512   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
513   /* submatrices of local problem */
514   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
515   /* submatrices of local coarse problem */
516   Mat             S_VV,S_CV,S_VC,S_CC;
517   /* working matrices */
518   Mat             C_CR;
519   /* additional working stuff */
520   PC              pc_R;
521   Mat             F;
522   PetscBool       isLU,isCHOL,isILU;
523 
524   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
525   PetscScalar     *work;
526   PetscInt        *idx_V_B;
527   PetscInt        n,n_vertices,n_constraints;
528   PetscInt        i,n_R,n_D,n_B;
529   PetscBool       unsymmetric_check;
530   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
531   MatType         impMatType;
532   /* some shortcuts to scalars */
533   PetscScalar     one=1.0,m_one=-1.0;
534 
535   PetscFunctionBegin;
536   n_vertices = pcbddc->n_vertices;
537   n_constraints = pcbddc->local_primal_size-n_vertices;
538   /* Set Non-overlapping dimensions */
539   n_B = pcis->n_B;
540   n_D = pcis->n - n_B;
541   n_R = pcis->n - n_vertices;
542 
543   /* Set types for local objects needed by BDDC precondtioner */
544   impMatType = MATSEQDENSE;
545 
546   /* vertices in boundary numbering */
547   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
548   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
549   if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i);
550 
551   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
552   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
553   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
554   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
555   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
556   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
557   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
558   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
561 
562   unsymmetric_check = PETSC_FALSE;
563   /* allocate workspace */
564   n = 0;
565   if (n_constraints) {
566     n += n_R*n_constraints;
567   }
568   if (n_vertices) {
569     n = PetscMax(2*n_R*n_vertices,n);
570     n = PetscMax((n_R+n_B)*n_vertices,n);
571   }
572   if (!pcbddc->symmetric_primal) {
573     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
574     unsymmetric_check = PETSC_TRUE;
575   }
576   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
577 
578   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
579   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
580   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
581   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
582   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
583   if (isLU || isILU || isCHOL) {
584     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
585   } else if (sub_schurs->reuse_mumps) {
586     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
587     MatFactorType type;
588 
589     F = reuse_mumps->F;
590     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
591     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
592   } else {
593     F = NULL;
594   }
595 
596   /* Precompute stuffs needed for preprocessing and application of BDDC*/
597   if (n_constraints) {
598     Mat         M1,M2,M3;
599     Mat         auxmat;
600     IS          is_aux;
601     PetscScalar *array,*array2;
602 
603     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
604     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
605 
606     /* Extract constraints on R nodes: C_{CR}  */
607     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
608     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
609     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
610 
611     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
612     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
613     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
614     for (i=0;i<n_constraints;i++) {
615       const PetscScalar *row_cmat_values;
616       const PetscInt    *row_cmat_indices;
617       PetscInt          size_of_constraint,j;
618 
619       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
620       for (j=0;j<size_of_constraint;j++) {
621         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
622       }
623       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
624     }
625     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
626     if (F) {
627       Mat B;
628 
629       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
630       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
631       ierr = MatDestroy(&B);CHKERRQ(ierr);
632     } else {
633       PetscScalar *marr;
634 
635       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
636       for (i=0;i<n_constraints;i++) {
637         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
638         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
639         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
640         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
641         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
642       }
643       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
644     }
645     if (!pcbddc->switch_static) {
646       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
648       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
649       for (i=0;i<n_constraints;i++) {
650         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
651         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
652         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
653         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
654         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
655         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
656       }
657       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
658       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
659       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
660     } else {
661       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
662       pcbddc->local_auxmat2 = local_auxmat2_R;
663       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
664     }
665     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
666     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
667     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
668     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
669     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
670     if (isCHOL) {
671       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
672     } else {
673       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
674     }
675     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
676     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
677     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
678     ierr = MatDestroy(&M2);CHKERRQ(ierr);
679     ierr = MatDestroy(&M3);CHKERRQ(ierr);
680     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
681     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
682     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
683     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
684     ierr = MatDestroy(&M1);CHKERRQ(ierr);
685   }
686   /* Get submatrices from subdomain matrix */
687   if (n_vertices) {
688     IS is_aux;
689 
690     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
691       IS tis;
692 
693       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
694       ierr = ISSort(tis);CHKERRQ(ierr);
695       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
696       ierr = ISDestroy(&tis);CHKERRQ(ierr);
697     } else {
698       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
699     }
700     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
701     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
702     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
703     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
704   }
705 
706   /* Matrix of coarse basis functions (local) */
707   if (pcbddc->coarse_phi_B) {
708     PetscInt on_B,on_primal,on_D=n_D;
709     if (pcbddc->coarse_phi_D) {
710       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
711     }
712     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
713     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
714       PetscScalar *marray;
715 
716       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
717       ierr = PetscFree(marray);CHKERRQ(ierr);
718       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
719       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
720       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
722     }
723   }
724 
725   if (!pcbddc->coarse_phi_B) {
726     PetscScalar *marray;
727 
728     n = n_B*pcbddc->local_primal_size;
729     if (pcbddc->switch_static || pcbddc->dbg_flag) {
730       n += n_D*pcbddc->local_primal_size;
731     }
732     if (!pcbddc->symmetric_primal) {
733       n *= 2;
734     }
735     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
736     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
737     n = n_B*pcbddc->local_primal_size;
738     if (pcbddc->switch_static || pcbddc->dbg_flag) {
739       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
740       n += n_D*pcbddc->local_primal_size;
741     }
742     if (!pcbddc->symmetric_primal) {
743       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
744       if (pcbddc->switch_static || pcbddc->dbg_flag) {
745         n = n_B*pcbddc->local_primal_size;
746         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
747       }
748     } else {
749       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
750       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
751       if (pcbddc->switch_static || pcbddc->dbg_flag) {
752         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
753         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
754       }
755     }
756   }
757   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
758   /* vertices */
759   if (n_vertices) {
760 
761     ierr = MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr);
762 
763     if (n_R) {
764       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
765       PetscBLASInt B_N,B_one = 1;
766       PetscScalar  *x,*y;
767       PetscBool    isseqaij;
768 
769       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
770       ierr = MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr);
771       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
772       if (F) { /* TODO could be optimized for symmetric problems */
773         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
774       } else {
775         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
776         for (i=0;i<n_vertices;i++) {
777           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
778           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
779           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
781           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
782         }
783         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
784       }
785       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
786       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
787       if (n_constraints) {
788         Mat B;
789 
790         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
791         for (i=0;i<n_vertices;i++) {
792           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
793           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
794           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
795           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
796           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
797           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
798         }
799         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
800         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
801         ierr = MatDestroy(&B);CHKERRQ(ierr);
802         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
803         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
804         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
805         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
806         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
807         ierr = MatDestroy(&B);CHKERRQ(ierr);
808       }
809       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
810       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
811         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr);
812       }
813       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
814       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
815       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
816       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
817       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
818       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
819       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
820       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
821       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
822       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
823     } else {
824       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
825     }
826     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
827     /* coarse basis functions */
828     for (i=0;i<n_vertices;i++) {
829       PetscScalar *y;
830 
831       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
832       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
833       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
834       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
836       y[n_B*i+idx_V_B[i]] = 1.0;
837       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
838       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
839 
840       if (pcbddc->switch_static || pcbddc->dbg_flag) {
841         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
842         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
843         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
844         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
845         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
846         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
847       }
848       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
849     }
850     /* if n_R == 0 the object is not destroyed */
851     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
852   }
853 
854   if (n_constraints) {
855     Mat B;
856 
857     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
858     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
859     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
860     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
861     if (n_vertices) {
862       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
863         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
864       } else {
865         Mat S_VCt;
866 
867         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
868         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
869         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
870       }
871     }
872     ierr = MatDestroy(&B);CHKERRQ(ierr);
873     /* coarse basis functions */
874     for (i=0;i<n_constraints;i++) {
875       PetscScalar *y;
876 
877       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
878       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
879       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
880       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
883       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
884       if (pcbddc->switch_static || pcbddc->dbg_flag) {
885         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
886         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
887         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
890         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
891       }
892       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
893     }
894   }
895   if (n_constraints) {
896     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
897   }
898 
899   /* compute other basis functions for non-symmetric problems */
900   if (!pcbddc->symmetric_primal) {
901 
902     if (n_constraints) {
903       Mat S_CCT,B_C;
904 
905       /* this is a lazy thing */
906       ierr = MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);CHKERRQ(ierr);
907       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
908       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
909       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
910       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
911       if (n_vertices) {
912         Mat B_V,S_VCT;
913 
914         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
915         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
916         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
917         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
918         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
919       }
920       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
921     } else { /* if there are no constraints, reset work */
922       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
923     }
924     if (n_vertices && n_R) {
925       Mat          A_VRT;
926       PetscScalar  *marray;
927       PetscBLASInt B_N,B_one = 1;
928 
929       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
930       ierr = MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);CHKERRQ(ierr);
931       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
932       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
933       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
934       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
935       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
936     }
937 
938     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
939       for (i=0;i<pcbddc->local_primal_size;i++) {
940         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
941         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
942         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
943         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
944         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
945       }
946     } else {
947       for (i=0;i<pcbddc->local_primal_size;i++) {
948         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
949         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
950         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
951         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
952         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
953       }
954     }
955     /* coarse basis functions */
956     for (i=0;i<pcbddc->local_primal_size;i++) {
957       PetscScalar *y;
958 
959       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
960       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
961       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
962       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
964       if (i<n_vertices) {
965         y[n_B*i+idx_V_B[i]] = 1.0;
966       }
967       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
968       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
969 
970       if (pcbddc->switch_static || pcbddc->dbg_flag) {
971         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
972         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
973         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
974         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
976         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
977       }
978       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
979     }
980   }
981   /* free memory */
982   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
983   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
984   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
985   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
986   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
987   ierr = PetscFree(work);CHKERRQ(ierr);
988   if (n_vertices) {
989     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
990   }
991   if (n_constraints) {
992     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
993   }
994   /* Checking coarse_sub_mat and coarse basis functios */
995   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
996   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
997   if (pcbddc->dbg_flag) {
998     Mat         coarse_sub_mat;
999     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1000     Mat         coarse_phi_D,coarse_phi_B;
1001     Mat         coarse_psi_D,coarse_psi_B;
1002     Mat         A_II,A_BB,A_IB,A_BI;
1003     Mat         C_B,CPHI;
1004     IS          is_dummy;
1005     Vec         mones;
1006     MatType     checkmattype=MATSEQAIJ;
1007     PetscReal   real_value;
1008 
1009     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1010     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1011     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1012     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1013     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1014     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1015     if (unsymmetric_check) {
1016       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1017       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1018     }
1019     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1020 
1021     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1022     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1023     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1024     if (unsymmetric_check) {
1025       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1026       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1027       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1028       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1029       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1030       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1031       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1032       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1033       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1034       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1035       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1036       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1037     } else {
1038       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1039       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1040       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1041       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1042       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1043       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1044       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1045       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1046     }
1047     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1048     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1049     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1050     ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr);
1051     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1052     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1053     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
1054     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1055 
1056     /* check constraints */
1057     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1058     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr);
1059     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1060     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1061     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1062     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1063     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1064     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1065     if (unsymmetric_check) {
1066       ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1067       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1068       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1069       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1070       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1071     }
1072     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1073     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1074     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1075     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1076 
1077     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1078     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1079     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1080     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1081     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1082     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1083     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1084     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1085     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1086     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1087     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1088     if (unsymmetric_check) {
1089       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1090       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1091     }
1092     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1093   }
1094   /* get back data */
1095   *coarse_submat_vals_n = coarse_submat_vals;
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1101 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1102 {
1103   Mat            *work_mat;
1104   IS             isrow_s,iscol_s;
1105   PetscBool      rsorted,csorted;
1106   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1111   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1112   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1113   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1114 
1115   if (!rsorted) {
1116     const PetscInt *idxs;
1117     PetscInt *idxs_sorted,i;
1118 
1119     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1120     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1121     for (i=0;i<rsize;i++) {
1122       idxs_perm_r[i] = i;
1123     }
1124     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1125     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1126     for (i=0;i<rsize;i++) {
1127       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1128     }
1129     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1130     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1131   } else {
1132     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1133     isrow_s = isrow;
1134   }
1135 
1136   if (!csorted) {
1137     if (isrow == iscol) {
1138       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1139       iscol_s = isrow_s;
1140     } else {
1141       const PetscInt *idxs;
1142       PetscInt       *idxs_sorted,i;
1143 
1144       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1145       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1146       for (i=0;i<csize;i++) {
1147         idxs_perm_c[i] = i;
1148       }
1149       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1150       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1151       for (i=0;i<csize;i++) {
1152         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1153       }
1154       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1155       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1156     }
1157   } else {
1158     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1159     iscol_s = iscol;
1160   }
1161 
1162   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1163 
1164   if (!rsorted || !csorted) {
1165     Mat      new_mat;
1166     IS       is_perm_r,is_perm_c;
1167 
1168     if (!rsorted) {
1169       PetscInt *idxs_r,i;
1170       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1171       for (i=0;i<rsize;i++) {
1172         idxs_r[idxs_perm_r[i]] = i;
1173       }
1174       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1175       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1176     } else {
1177       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1178     }
1179     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1180 
1181     if (!csorted) {
1182       if (isrow_s == iscol_s) {
1183         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1184         is_perm_c = is_perm_r;
1185       } else {
1186         PetscInt *idxs_c,i;
1187         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1188         for (i=0;i<csize;i++) {
1189           idxs_c[idxs_perm_c[i]] = i;
1190         }
1191         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1192         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1193       }
1194     } else {
1195       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1196     }
1197     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1198 
1199     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1200     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1201     work_mat[0] = new_mat;
1202     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1203     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1204   }
1205 
1206   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1207   *B = work_mat[0];
1208   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1209   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1210   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1216 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1217 {
1218   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1219   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1220   Mat            new_mat;
1221   IS             is_local,is_global;
1222   PetscInt       local_size;
1223   PetscBool      isseqaij;
1224   PetscErrorCode ierr;
1225 
1226   PetscFunctionBegin;
1227   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1228   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1229   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1230   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1231   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1232   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1233   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1234 
1235   /* check */
1236   if (pcbddc->dbg_flag) {
1237     Vec       x,x_change;
1238     PetscReal error;
1239 
1240     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1241     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1242     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1243     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1246     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1247     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1248     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1249     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1250     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1251     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1252     ierr = VecDestroy(&x);CHKERRQ(ierr);
1253     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1254   }
1255 
1256   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1257   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1258   if (isseqaij) {
1259     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1260   } else {
1261     Mat work_mat;
1262     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1263     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1264     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1265   }
1266   if (matis->A->symmetric_set) {
1267     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1268 #if !defined(PETSC_USE_COMPLEX)
1269     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1270 #endif
1271   }
1272   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1278 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1279 {
1280   PC_IS*          pcis = (PC_IS*)(pc->data);
1281   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1282   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1283   PetscInt        *idx_R_local=NULL;
1284   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1285   PetscInt        vbs,bs;
1286   PetscBT         bitmask=NULL;
1287   PetscErrorCode  ierr;
1288 
1289   PetscFunctionBegin;
1290   /*
1291     No need to setup local scatters if
1292       - primal space is unchanged
1293         AND
1294       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1295         AND
1296       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1297   */
1298   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1299     PetscFunctionReturn(0);
1300   }
1301   /* destroy old objects */
1302   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1303   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1304   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1305   /* Set Non-overlapping dimensions */
1306   n_B = pcis->n_B;
1307   n_D = pcis->n - n_B;
1308   n_vertices = pcbddc->n_vertices;
1309 
1310   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1311 
1312   /* create auxiliary bitmask and allocate workspace */
1313   if (!sub_schurs->reuse_mumps) {
1314     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1315     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1316     for (i=0;i<n_vertices;i++) {
1317       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1318     }
1319 
1320     for (i=0, n_R=0; i<pcis->n; i++) {
1321       if (!PetscBTLookup(bitmask,i)) {
1322         idx_R_local[n_R++] = i;
1323       }
1324     }
1325   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1326     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1327 
1328     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1329     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1330   }
1331 
1332   /* Block code */
1333   vbs = 1;
1334   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1335   if (bs>1 && !(n_vertices%bs)) {
1336     PetscBool is_blocked = PETSC_TRUE;
1337     PetscInt  *vary;
1338     if (!sub_schurs->reuse_mumps) {
1339       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1340       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1341       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1342       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1343       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1344       for (i=0; i<pcis->n/bs; i++) {
1345         if (vary[i]!=0 && vary[i]!=bs) {
1346           is_blocked = PETSC_FALSE;
1347           break;
1348         }
1349       }
1350       ierr = PetscFree(vary);CHKERRQ(ierr);
1351     } else {
1352       /* Verify directly the R set */
1353       for (i=0; i<n_R/bs; i++) {
1354         PetscInt j,node=idx_R_local[bs*i];
1355         for (j=1; j<bs; j++) {
1356           if (node != idx_R_local[bs*i+j]-j) {
1357             is_blocked = PETSC_FALSE;
1358             break;
1359           }
1360         }
1361       }
1362     }
1363     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1364       vbs = bs;
1365       for (i=0;i<n_R/vbs;i++) {
1366         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1367       }
1368     }
1369   }
1370   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1371   if (sub_schurs->reuse_mumps) {
1372     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1373 
1374     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1375     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1376     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1377     reuse_mumps->is_R = pcbddc->is_R_local;
1378   } else {
1379     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1380   }
1381 
1382   /* print some info if requested */
1383   if (pcbddc->dbg_flag) {
1384     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1385     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1386     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
1387     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1388     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1389     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1390     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1391   }
1392 
1393   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1394   if (!sub_schurs->reuse_mumps) {
1395     IS       is_aux1,is_aux2;
1396     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1397 
1398     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1399     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1400     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1401     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1402     for (i=0; i<n_D; i++) {
1403       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1404     }
1405     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1406     for (i=0, j=0; i<n_R; i++) {
1407       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1408         aux_array1[j++] = i;
1409       }
1410     }
1411     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1412     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1413     for (i=0, j=0; i<n_B; i++) {
1414       if (!PetscBTLookup(bitmask,is_indices[i])) {
1415         aux_array2[j++] = i;
1416       }
1417     }
1418     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1419     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1420     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1421     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1422     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1423 
1424     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1425       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1426       for (i=0, j=0; i<n_R; i++) {
1427         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1428           aux_array1[j++] = i;
1429         }
1430       }
1431       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1432       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1433       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1434     }
1435     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1436     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1437   } else {
1438     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1439     IS               tis;
1440     PetscInt         schur_size;
1441 
1442     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1443     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1444     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1445     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1446     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1447       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1448       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1449       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1450     }
1451   }
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1458 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1459 {
1460   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1461   PC_IS          *pcis = (PC_IS*)pc->data;
1462   PC             pc_temp;
1463   Mat            A_RR;
1464   MatReuse       reuse;
1465   PetscScalar    m_one = -1.0;
1466   PetscReal      value;
1467   PetscInt       n_D,n_R;
1468   PetscBool      check_corr[2],issbaij;
1469   PetscErrorCode ierr;
1470   /* prefixes stuff */
1471   char           dir_prefix[256],neu_prefix[256],str_level[16];
1472   size_t         len;
1473 
1474   PetscFunctionBegin;
1475 
1476   /* compute prefixes */
1477   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1478   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1479   if (!pcbddc->current_level) {
1480     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1481     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1482     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1483     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1484   } else {
1485     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1486     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1487     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1488     len -= 15; /* remove "pc_bddc_coarse_" */
1489     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1490     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1491     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1492     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1493     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1494     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1495     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1496     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1497   }
1498 
1499   /* DIRICHLET PROBLEM */
1500   if (dirichlet) {
1501     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1502     if (pcbddc->local_mat->symmetric_set) {
1503       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1504     }
1505     /* Matrix for Dirichlet problem is pcis->A_II */
1506     n_D = pcis->n - pcis->n_B;
1507     if (!pcbddc->ksp_D) { /* create object if not yet build */
1508       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1509       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1510       /* default */
1511       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1512       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1513       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1514       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1515       if (issbaij) {
1516         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1517       } else {
1518         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1519       }
1520       /* Allow user's customization */
1521       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1522       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1523     }
1524     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1525     if (sub_schurs->reuse_mumps) {
1526       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1527 
1528       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1529     }
1530     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1531     if (!n_D) {
1532       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1533       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1534     }
1535     /* Set Up KSP for Dirichlet problem of BDDC */
1536     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1537     /* set ksp_D into pcis data */
1538     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1539     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1540     pcis->ksp_D = pcbddc->ksp_D;
1541   }
1542 
1543   /* NEUMANN PROBLEM */
1544   A_RR = 0;
1545   if (neumann) {
1546     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1547     PetscInt        ibs,mbs;
1548     PetscBool       issbaij;
1549     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1550     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1551     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1552     if (pcbddc->ksp_R) { /* already created ksp */
1553       PetscInt nn_R;
1554       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1555       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1556       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1557       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1558         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1559         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1560         reuse = MAT_INITIAL_MATRIX;
1561       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1562         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1563           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1564           reuse = MAT_INITIAL_MATRIX;
1565         } else { /* safe to reuse the matrix */
1566           reuse = MAT_REUSE_MATRIX;
1567         }
1568       }
1569       /* last check */
1570       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1571         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1572         reuse = MAT_INITIAL_MATRIX;
1573       }
1574     } else { /* first time, so we need to create the matrix */
1575       reuse = MAT_INITIAL_MATRIX;
1576     }
1577     /* extract A_RR */
1578     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1579     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1580     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1581     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1582       if (matis->A == pcbddc->local_mat) {
1583         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1584         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1585       } else {
1586         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1587       }
1588     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1589       if (matis->A == pcbddc->local_mat) {
1590         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1591         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1592       } else {
1593         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1594       }
1595     }
1596     if (!sub_schurs->reuse_mumps) {
1597       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1598       if (pcbddc->local_mat->symmetric_set) {
1599         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1600       }
1601     } else {
1602       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1603 
1604       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1605       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1606       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1607     }
1608     if (!pcbddc->ksp_R) { /* create object if not present */
1609       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1610       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1611       /* default */
1612       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1613       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1614       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1615       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1616       if (issbaij) {
1617         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1618       } else {
1619         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1620       }
1621       /* Allow user's customization */
1622       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1623       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1624     }
1625     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1626     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1627     if (!n_R) {
1628       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1629       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1630     }
1631     /* Reuse MUMPS solver if it is present */
1632     if (sub_schurs->reuse_mumps) {
1633       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1634 
1635       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1636     }
1637     /* Set Up KSP for Neumann problem of BDDC */
1638     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1639   }
1640   /* free Neumann problem's matrix */
1641   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1642 
1643   if (pcbddc->dbg_flag) {
1644     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1645     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
1646     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1647   }
1648 
1649   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
1650   check_corr[0] = check_corr[1] = PETSC_FALSE;
1651   if (pcbddc->NullSpace_corr[0]) {
1652     ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr);
1653   }
1654   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
1655     check_corr[0] = PETSC_TRUE;
1656     ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr);
1657   }
1658   if (neumann && pcbddc->NullSpace_corr[2]) {
1659     check_corr[1] = PETSC_TRUE;
1660     ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr);
1661   }
1662 
1663   /* check Dirichlet and Neumann solvers */
1664   if (pcbddc->dbg_flag) {
1665     if (dirichlet) { /* Dirichlet */
1666       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1667       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1668       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1669       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1670       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1671       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1672       if (check_corr[0]) {
1673         ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr);
1674       }
1675       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1676     }
1677     if (neumann) { /* Neumann */
1678       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1679       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1680       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1681       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1682       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1683       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1684       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1685       if (check_corr[1]) {
1686         ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr);
1687       }
1688       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1689     }
1690   }
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1696 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1697 {
1698   PetscErrorCode  ierr;
1699   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1700   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1701 
1702   PetscFunctionBegin;
1703   if (!sub_schurs->reuse_mumps) {
1704     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1705   }
1706   if (!pcbddc->switch_static) {
1707     if (applytranspose && pcbddc->local_auxmat1) {
1708       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1709       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1710     }
1711     if (!sub_schurs->reuse_mumps) {
1712       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1713       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1714     } else {
1715       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1716 
1717       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1718       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1719     }
1720   } else {
1721     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1722     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1723     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1724     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1725     if (applytranspose && pcbddc->local_auxmat1) {
1726       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1727       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1728       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1729       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1730     }
1731   }
1732   if (!sub_schurs->reuse_mumps) {
1733     if (applytranspose) {
1734       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1735     } else {
1736       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1737     }
1738   } else {
1739     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1740 
1741     if (applytranspose) {
1742       ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1743     } else {
1744       ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1745     }
1746   }
1747   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1748   if (!pcbddc->switch_static) {
1749     if (!sub_schurs->reuse_mumps) {
1750       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1751       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1752     } else {
1753       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1754 
1755       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1756       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1757     }
1758     if (!applytranspose && pcbddc->local_auxmat1) {
1759       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1760       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1761     }
1762   } else {
1763     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1764     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1765     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1766     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1767     if (!applytranspose && pcbddc->local_auxmat1) {
1768       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1769       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1770     }
1771     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1772     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1773     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1774     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1775   }
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1780 #undef __FUNCT__
1781 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1782 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1783 {
1784   PetscErrorCode ierr;
1785   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1786   PC_IS*            pcis = (PC_IS*)  (pc->data);
1787   const PetscScalar zero = 0.0;
1788 
1789   PetscFunctionBegin;
1790   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1791   if (applytranspose) {
1792     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1793     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1794   } else {
1795     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1796     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1797   }
1798   /* start communications from local primal nodes to rhs of coarse solver */
1799   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1800   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1801   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1802 
1803   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1804   /* TODO remove null space when doing multilevel */
1805   if (pcbddc->coarse_ksp) {
1806     Vec rhs,sol;
1807 
1808     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1809     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1810     if (applytranspose) {
1811       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1812     } else {
1813       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1814     }
1815   }
1816 
1817   /* Local solution on R nodes */
1818   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1819     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1820   }
1821 
1822   /* communications from coarse sol to local primal nodes */
1823   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1824   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1825 
1826   /* Sum contributions from two levels */
1827   if (applytranspose) {
1828     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1829     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1830   } else {
1831     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1832     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1833   }
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1839 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1840 {
1841   PetscErrorCode ierr;
1842   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1843   PetscScalar    *array;
1844   Vec            from,to;
1845 
1846   PetscFunctionBegin;
1847   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1848     from = pcbddc->coarse_vec;
1849     to = pcbddc->vec1_P;
1850     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1851       Vec tvec;
1852 
1853       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1854       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1855       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1856       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1857       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1858       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1859     }
1860   } else { /* from local to global -> put data in coarse right hand side */
1861     from = pcbddc->vec1_P;
1862     to = pcbddc->coarse_vec;
1863   }
1864   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1870 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1871 {
1872   PetscErrorCode ierr;
1873   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1874   PetscScalar    *array;
1875   Vec            from,to;
1876 
1877   PetscFunctionBegin;
1878   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1879     from = pcbddc->coarse_vec;
1880     to = pcbddc->vec1_P;
1881   } else { /* from local to global -> put data in coarse right hand side */
1882     from = pcbddc->vec1_P;
1883     to = pcbddc->coarse_vec;
1884   }
1885   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1886   if (smode == SCATTER_FORWARD) {
1887     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1888       Vec tvec;
1889 
1890       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1891       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1892       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1893       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1894     }
1895   } else {
1896     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1897      ierr = VecResetArray(from);CHKERRQ(ierr);
1898     }
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 /* uncomment for testing purposes */
1904 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1905 #undef __FUNCT__
1906 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1907 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1908 {
1909   PetscErrorCode    ierr;
1910   PC_IS*            pcis = (PC_IS*)(pc->data);
1911   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1912   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1913   /* one and zero */
1914   PetscScalar       one=1.0,zero=0.0;
1915   /* space to store constraints and their local indices */
1916   PetscScalar       *constraints_data;
1917   PetscInt          *constraints_idxs,*constraints_idxs_B;
1918   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1919   PetscInt          *constraints_n;
1920   /* iterators */
1921   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1922   /* BLAS integers */
1923   PetscBLASInt      lwork,lierr;
1924   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1925   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1926   /* reuse */
1927   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1928   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1929   /* change of basis */
1930   PetscBool         qr_needed;
1931   PetscBT           change_basis,qr_needed_idx;
1932   /* auxiliary stuff */
1933   PetscInt          *nnz,*is_indices;
1934   PetscInt          ncc;
1935   /* some quantities */
1936   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1937   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1938 
1939   PetscFunctionBegin;
1940   /* Destroy Mat objects computed previously */
1941   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1942   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1943   /* save info on constraints from previous setup (if any) */
1944   olocal_primal_size = pcbddc->local_primal_size;
1945   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1946   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1947   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1948   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1949   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1950   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1951 
1952   /* print some info */
1953   if (pcbddc->dbg_flag) {
1954     IS       vertices;
1955     PetscInt nv,nedges,nfaces;
1956     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1957     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1958     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1959     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
1960     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1961     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1962     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1963     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1964     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1965     ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
1966   }
1967 
1968   if (!pcbddc->adaptive_selection) {
1969     IS           ISForVertices,*ISForFaces,*ISForEdges;
1970     MatNullSpace nearnullsp;
1971     const Vec    *nearnullvecs;
1972     Vec          *localnearnullsp;
1973     PetscScalar  *array;
1974     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1975     PetscBool    nnsp_has_cnst;
1976     /* LAPACK working arrays for SVD or POD */
1977     PetscBool    skip_lapack,boolforchange;
1978     PetscScalar  *work;
1979     PetscReal    *singular_vals;
1980 #if defined(PETSC_USE_COMPLEX)
1981     PetscReal    *rwork;
1982 #endif
1983 #if defined(PETSC_MISSING_LAPACK_GESVD)
1984     PetscScalar  *temp_basis,*correlation_mat;
1985 #else
1986     PetscBLASInt dummy_int=1;
1987     PetscScalar  dummy_scalar=1.;
1988 #endif
1989 
1990     /* Get index sets for faces, edges and vertices from graph */
1991     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1992     /* free unneeded index sets */
1993     if (!pcbddc->use_vertices) {
1994       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1995     }
1996     if (!pcbddc->use_edges) {
1997       for (i=0;i<n_ISForEdges;i++) {
1998         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1999       }
2000       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2001       n_ISForEdges = 0;
2002     }
2003     if (!pcbddc->use_faces) {
2004       for (i=0;i<n_ISForFaces;i++) {
2005         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2006       }
2007       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2008       n_ISForFaces = 0;
2009     }
2010 
2011     /* check if near null space is attached to global mat */
2012     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2013     if (nearnullsp) {
2014       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2015       /* remove any stored info */
2016       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2017       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2018       /* store information for BDDC solver reuse */
2019       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2020       pcbddc->onearnullspace = nearnullsp;
2021       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2022       for (i=0;i<nnsp_size;i++) {
2023         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2024       }
2025     } else { /* if near null space is not provided BDDC uses constants by default */
2026       nnsp_size = 0;
2027       nnsp_has_cnst = PETSC_TRUE;
2028     }
2029     /* get max number of constraints on a single cc */
2030     max_constraints = nnsp_size;
2031     if (nnsp_has_cnst) max_constraints++;
2032 
2033     /*
2034          Evaluate maximum storage size needed by the procedure
2035          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2036          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2037          There can be multiple constraints per connected component
2038                                                                                                                                                            */
2039     n_vertices = 0;
2040     if (ISForVertices) {
2041       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2042     }
2043     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2044     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2045 
2046     total_counts = n_ISForFaces+n_ISForEdges;
2047     total_counts *= max_constraints;
2048     total_counts += n_vertices;
2049     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2050 
2051     total_counts = 0;
2052     max_size_of_constraint = 0;
2053     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2054       IS used_is;
2055       if (i<n_ISForEdges) {
2056         used_is = ISForEdges[i];
2057       } else {
2058         used_is = ISForFaces[i-n_ISForEdges];
2059       }
2060       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2061       total_counts += j;
2062       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2063     }
2064     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2065 
2066     /* get local part of global near null space vectors */
2067     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2068     for (k=0;k<nnsp_size;k++) {
2069       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2070       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2071       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2072     }
2073 
2074     /* whether or not to skip lapack calls */
2075     skip_lapack = PETSC_TRUE;
2076     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2077 
2078     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2079     if (!skip_lapack) {
2080       PetscScalar temp_work;
2081 
2082 #if defined(PETSC_MISSING_LAPACK_GESVD)
2083       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2084       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2085       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2086       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2087 #if defined(PETSC_USE_COMPLEX)
2088       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2089 #endif
2090       /* now we evaluate the optimal workspace using query with lwork=-1 */
2091       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2092       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2093       lwork = -1;
2094       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2095 #if !defined(PETSC_USE_COMPLEX)
2096       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2097 #else
2098       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2099 #endif
2100       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2101       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2102 #else /* on missing GESVD */
2103       /* SVD */
2104       PetscInt max_n,min_n;
2105       max_n = max_size_of_constraint;
2106       min_n = max_constraints;
2107       if (max_size_of_constraint < max_constraints) {
2108         min_n = max_size_of_constraint;
2109         max_n = max_constraints;
2110       }
2111       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2112 #if defined(PETSC_USE_COMPLEX)
2113       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2114 #endif
2115       /* now we evaluate the optimal workspace using query with lwork=-1 */
2116       lwork = -1;
2117       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2118       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2119       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2120       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2121 #if !defined(PETSC_USE_COMPLEX)
2122       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2123 #else
2124       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2125 #endif
2126       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2127       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2128 #endif /* on missing GESVD */
2129       /* Allocate optimal workspace */
2130       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2131       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2132     }
2133     /* Now we can loop on constraining sets */
2134     total_counts = 0;
2135     constraints_idxs_ptr[0] = 0;
2136     constraints_data_ptr[0] = 0;
2137     /* vertices */
2138     if (n_vertices) {
2139       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2140       if (nnsp_has_cnst) { /* it considers all possible vertices */
2141         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2142         for (i=0;i<n_vertices;i++) {
2143           constraints_n[total_counts] = 1;
2144           constraints_data[total_counts] = 1.0;
2145           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2146           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2147           total_counts++;
2148         }
2149       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2150         PetscBool used_vertex;
2151         for (i=0;i<n_vertices;i++) {
2152           used_vertex = PETSC_FALSE;
2153           k = 0;
2154           while (!used_vertex && k<nnsp_size) {
2155             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2156             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2157               constraints_n[total_counts] = 1;
2158               constraints_idxs[total_counts] = is_indices[i];
2159               constraints_data[total_counts] = 1.0;
2160               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2161               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2162               total_counts++;
2163               used_vertex = PETSC_TRUE;
2164             }
2165             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2166             k++;
2167           }
2168         }
2169       }
2170       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2171       n_vertices = total_counts;
2172     }
2173 
2174     /* edges and faces */
2175     total_counts_cc = total_counts;
2176     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2177       IS        used_is;
2178       PetscBool idxs_copied = PETSC_FALSE;
2179 
2180       if (ncc<n_ISForEdges) {
2181         used_is = ISForEdges[ncc];
2182         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2183       } else {
2184         used_is = ISForFaces[ncc-n_ISForEdges];
2185         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2186       }
2187       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2188 
2189       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2190       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2191       /* change of basis should not be performed on local periodic nodes */
2192       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2193       if (nnsp_has_cnst) {
2194         PetscScalar quad_value;
2195 
2196         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2197         idxs_copied = PETSC_TRUE;
2198 
2199         if (!pcbddc->use_nnsp_true) {
2200           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2201         } else {
2202           quad_value = 1.0;
2203         }
2204         for (j=0;j<size_of_constraint;j++) {
2205           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2206         }
2207         temp_constraints++;
2208         total_counts++;
2209       }
2210       for (k=0;k<nnsp_size;k++) {
2211         PetscReal real_value;
2212         PetscScalar *ptr_to_data;
2213 
2214         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2215         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2216         for (j=0;j<size_of_constraint;j++) {
2217           ptr_to_data[j] = array[is_indices[j]];
2218         }
2219         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2220         /* check if array is null on the connected component */
2221         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2222         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2223         if (real_value > 0.0) { /* keep indices and values */
2224           temp_constraints++;
2225           total_counts++;
2226           if (!idxs_copied) {
2227             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2228             idxs_copied = PETSC_TRUE;
2229           }
2230         }
2231       }
2232       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2233       valid_constraints = temp_constraints;
2234       if (!pcbddc->use_nnsp_true && temp_constraints) {
2235         if (temp_constraints == 1) { /* just normalize the constraint */
2236           PetscScalar norm,*ptr_to_data;
2237 
2238           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2239           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2240           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2241           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2242           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2243         } else { /* perform SVD */
2244           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2245           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2246 
2247 #if defined(PETSC_MISSING_LAPACK_GESVD)
2248           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2249              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2250              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2251                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2252                 from that computed using LAPACKgesvd
2253              -> This is due to a different computation of eigenvectors in LAPACKheev
2254              -> The quality of the POD-computed basis will be the same */
2255           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2256           /* Store upper triangular part of correlation matrix */
2257           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2258           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2259           for (j=0;j<temp_constraints;j++) {
2260             for (k=0;k<j+1;k++) {
2261               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2262             }
2263           }
2264           /* compute eigenvalues and eigenvectors of correlation matrix */
2265           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2266           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2267 #if !defined(PETSC_USE_COMPLEX)
2268           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2269 #else
2270           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2271 #endif
2272           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2273           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2274           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2275           j = 0;
2276           while (j < temp_constraints && singular_vals[j] < tol) j++;
2277           total_counts = total_counts-j;
2278           valid_constraints = temp_constraints-j;
2279           /* scale and copy POD basis into used quadrature memory */
2280           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2281           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2282           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2283           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2284           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2285           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2286           if (j<temp_constraints) {
2287             PetscInt ii;
2288             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2289             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2290             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2291             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2292             for (k=0;k<temp_constraints-j;k++) {
2293               for (ii=0;ii<size_of_constraint;ii++) {
2294                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2295               }
2296             }
2297           }
2298 #else  /* on missing GESVD */
2299           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2300           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2301           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2302           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2303 #if !defined(PETSC_USE_COMPLEX)
2304           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2305 #else
2306           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2307 #endif
2308           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2309           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2310           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2311           k = temp_constraints;
2312           if (k > size_of_constraint) k = size_of_constraint;
2313           j = 0;
2314           while (j < k && singular_vals[k-j-1] < tol) j++;
2315           valid_constraints = k-j;
2316           total_counts = total_counts-temp_constraints+valid_constraints;
2317 #endif /* on missing GESVD */
2318         }
2319       }
2320       /* update pointers information */
2321       if (valid_constraints) {
2322         constraints_n[total_counts_cc] = valid_constraints;
2323         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2324         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2325         /* set change_of_basis flag */
2326         if (boolforchange) {
2327           PetscBTSet(change_basis,total_counts_cc);
2328         }
2329         total_counts_cc++;
2330       }
2331     }
2332     /* free workspace */
2333     if (!skip_lapack) {
2334       ierr = PetscFree(work);CHKERRQ(ierr);
2335 #if defined(PETSC_USE_COMPLEX)
2336       ierr = PetscFree(rwork);CHKERRQ(ierr);
2337 #endif
2338       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2339 #if defined(PETSC_MISSING_LAPACK_GESVD)
2340       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2341       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2342 #endif
2343     }
2344     for (k=0;k<nnsp_size;k++) {
2345       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2346     }
2347     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2348     /* free index sets of faces, edges and vertices */
2349     for (i=0;i<n_ISForFaces;i++) {
2350       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2351     }
2352     if (n_ISForFaces) {
2353       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2354     }
2355     for (i=0;i<n_ISForEdges;i++) {
2356       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2357     }
2358     if (n_ISForEdges) {
2359       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2360     }
2361     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2362   } else {
2363     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2364 
2365     total_counts = 0;
2366     n_vertices = 0;
2367     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2368       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2369     }
2370     max_constraints = 0;
2371     total_counts_cc = 0;
2372     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2373       total_counts += pcbddc->adaptive_constraints_n[i];
2374       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2375       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2376     }
2377     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2378     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2379     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2380     constraints_data = pcbddc->adaptive_constraints_data;
2381     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2382     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2383     total_counts_cc = 0;
2384     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2385       if (pcbddc->adaptive_constraints_n[i]) {
2386         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2387       }
2388     }
2389 #if 0
2390     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2391     for (i=0;i<total_counts_cc;i++) {
2392       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2393       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2394       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2395         printf(" %d",constraints_idxs[j]);
2396       }
2397       printf("\n");
2398       printf("number of cc: %d\n",constraints_n[i]);
2399     }
2400     for (i=0;i<n_vertices;i++) {
2401       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2402     }
2403     for (i=0;i<sub_schurs->n_subs;i++) {
2404       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2405     }
2406 #endif
2407 
2408     max_size_of_constraint = 0;
2409     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2410     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2411     /* Change of basis */
2412     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2413     if (pcbddc->use_change_of_basis) {
2414       for (i=0;i<sub_schurs->n_subs;i++) {
2415         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2416           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2417         }
2418       }
2419     }
2420   }
2421   pcbddc->local_primal_size = total_counts;
2422   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2423 
2424   /* map constraints_idxs in boundary numbering */
2425   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2426   if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i);
2427 
2428   /* Create constraint matrix */
2429   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2430   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2431   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2432 
2433   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2434   /* determine if a QR strategy is needed for change of basis */
2435   qr_needed = PETSC_FALSE;
2436   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2437   total_primal_vertices=0;
2438   pcbddc->local_primal_size_cc = 0;
2439   for (i=0;i<total_counts_cc;i++) {
2440     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2441     if (size_of_constraint == 1) {
2442       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2443       pcbddc->local_primal_size_cc += 1;
2444     } else if (PetscBTLookup(change_basis,i)) {
2445       for (k=0;k<constraints_n[i];k++) {
2446         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2447       }
2448       pcbddc->local_primal_size_cc += constraints_n[i];
2449       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2450         PetscBTSet(qr_needed_idx,i);
2451         qr_needed = PETSC_TRUE;
2452       }
2453     } else {
2454       pcbddc->local_primal_size_cc += 1;
2455     }
2456   }
2457   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2458   pcbddc->n_vertices = total_primal_vertices;
2459   /* permute indices in order to have a sorted set of vertices */
2460   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2461 
2462   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2463   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2464   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2465 
2466   /* nonzero structure of constraint matrix */
2467   /* and get reference dof for local constraints */
2468   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2469   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2470 
2471   j = total_primal_vertices;
2472   total_counts = total_primal_vertices;
2473   cum = total_primal_vertices;
2474   for (i=n_vertices;i<total_counts_cc;i++) {
2475     if (!PetscBTLookup(change_basis,i)) {
2476       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2477       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2478       cum++;
2479       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2480       for (k=0;k<constraints_n[i];k++) {
2481         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2482         nnz[j+k] = size_of_constraint;
2483       }
2484       j += constraints_n[i];
2485     }
2486   }
2487   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2488   ierr = PetscFree(nnz);CHKERRQ(ierr);
2489 
2490   /* set values in constraint matrix */
2491   for (i=0;i<total_primal_vertices;i++) {
2492     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2493   }
2494   total_counts = total_primal_vertices;
2495   for (i=n_vertices;i<total_counts_cc;i++) {
2496     if (!PetscBTLookup(change_basis,i)) {
2497       PetscInt *cols;
2498 
2499       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2500       cols = constraints_idxs+constraints_idxs_ptr[i];
2501       for (k=0;k<constraints_n[i];k++) {
2502         PetscInt    row = total_counts+k;
2503         PetscScalar *vals;
2504 
2505         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2506         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2507       }
2508       total_counts += constraints_n[i];
2509     }
2510   }
2511   /* assembling */
2512   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2513   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2514 
2515   /*
2516   ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2517   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2518   ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
2519   */
2520   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2521   if (pcbddc->use_change_of_basis) {
2522     /* dual and primal dofs on a single cc */
2523     PetscInt     dual_dofs,primal_dofs;
2524     /* working stuff for GEQRF */
2525     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2526     PetscBLASInt lqr_work;
2527     /* working stuff for UNGQR */
2528     PetscScalar  *gqr_work,lgqr_work_t;
2529     PetscBLASInt lgqr_work;
2530     /* working stuff for TRTRS */
2531     PetscScalar  *trs_rhs;
2532     PetscBLASInt Blas_NRHS;
2533     /* pointers for values insertion into change of basis matrix */
2534     PetscInt     *start_rows,*start_cols;
2535     PetscScalar  *start_vals;
2536     /* working stuff for values insertion */
2537     PetscBT      is_primal;
2538     PetscInt     *aux_primal_numbering_B;
2539     /* matrix sizes */
2540     PetscInt     global_size,local_size;
2541     /* temporary change of basis */
2542     Mat          localChangeOfBasisMatrix;
2543     /* extra space for debugging */
2544     PetscScalar  *dbg_work;
2545 
2546     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2547     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2548     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2549     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2550     /* nonzeros for local mat */
2551     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2552     for (i=0;i<pcis->n;i++) nnz[i]=1;
2553     for (i=n_vertices;i<total_counts_cc;i++) {
2554       if (PetscBTLookup(change_basis,i)) {
2555         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2556         if (PetscBTLookup(qr_needed_idx,i)) {
2557           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2558         } else {
2559           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2560           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2561         }
2562       }
2563     }
2564     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2565     ierr = PetscFree(nnz);CHKERRQ(ierr);
2566     /* Set initial identity in the matrix */
2567     for (i=0;i<pcis->n;i++) {
2568       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2569     }
2570 
2571     if (pcbddc->dbg_flag) {
2572       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2573       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2574     }
2575 
2576 
2577     /* Now we loop on the constraints which need a change of basis */
2578     /*
2579        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2580        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2581 
2582        Basic blocks of change of basis matrix T computed by
2583 
2584           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2585 
2586             | 1        0   ...        0         s_1/S |
2587             | 0        1   ...        0         s_2/S |
2588             |              ...                        |
2589             | 0        ...            1     s_{n-1}/S |
2590             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2591 
2592             with S = \sum_{i=1}^n s_i^2
2593             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2594                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2595 
2596           - QR decomposition of constraints otherwise
2597     */
2598     if (qr_needed) {
2599       /* space to store Q */
2600       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2601       /* first we issue queries for optimal work */
2602       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2603       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2604       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2605       lqr_work = -1;
2606       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2607       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2608       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2609       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2610       lgqr_work = -1;
2611       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2612       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2613       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2614       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2615       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2616       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2617       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2618       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2619       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2620       /* array to store scaling factors for reflectors */
2621       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2622       /* array to store rhs and solution of triangular solver */
2623       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2624       /* allocating workspace for check */
2625       if (pcbddc->dbg_flag) {
2626         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2627       }
2628     }
2629     /* array to store whether a node is primal or not */
2630     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2631     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2632     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2633     if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i);
2634     for (i=0;i<total_primal_vertices;i++) {
2635       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2636     }
2637     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2638 
2639     /* loop on constraints and see whether or not they need a change of basis and compute it */
2640     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2641       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2642       if (PetscBTLookup(change_basis,total_counts)) {
2643         /* get constraint info */
2644         primal_dofs = constraints_n[total_counts];
2645         dual_dofs = size_of_constraint-primal_dofs;
2646 
2647         if (pcbddc->dbg_flag) {
2648           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2649         }
2650 
2651         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2652 
2653           /* copy quadrature constraints for change of basis check */
2654           if (pcbddc->dbg_flag) {
2655             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2656           }
2657           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2658           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2659 
2660           /* compute QR decomposition of constraints */
2661           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2662           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2663           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2664           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2665           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2666           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2667           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2668 
2669           /* explictly compute R^-T */
2670           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2671           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2672           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2673           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2674           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2675           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2676           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2677           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2678           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2679           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2680 
2681           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2682           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2683           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2684           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2685           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2686           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2687           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2688           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2689           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2690 
2691           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2692              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2693              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2694           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2695           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2696           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2697           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2698           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2699           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2700           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2701           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2702           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2703           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2704 
2705           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2706           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2707           /* insert cols for primal dofs */
2708           for (j=0;j<primal_dofs;j++) {
2709             start_vals = &qr_basis[j*size_of_constraint];
2710             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2711             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2712           }
2713           /* insert cols for dual dofs */
2714           for (j=0,k=0;j<dual_dofs;k++) {
2715             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2716               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2717               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2718               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2719               j++;
2720             }
2721           }
2722 
2723           /* check change of basis */
2724           if (pcbddc->dbg_flag) {
2725             PetscInt   ii,jj;
2726             PetscBool valid_qr=PETSC_TRUE;
2727             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2728             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2729             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2730             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2731             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2732             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2733             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2734             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2735             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2736             for (jj=0;jj<size_of_constraint;jj++) {
2737               for (ii=0;ii<primal_dofs;ii++) {
2738                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2739                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2740               }
2741             }
2742             if (!valid_qr) {
2743               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2744               for (jj=0;jj<size_of_constraint;jj++) {
2745                 for (ii=0;ii<primal_dofs;ii++) {
2746                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2747                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2748                   }
2749                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2750                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2751                   }
2752                 }
2753               }
2754             } else {
2755               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2756             }
2757           }
2758         } else { /* simple transformation block */
2759           PetscInt    row,col;
2760           PetscScalar val,norm;
2761 
2762           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2763           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2764           for (j=0;j<size_of_constraint;j++) {
2765             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2766             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2767             if (!PetscBTLookup(is_primal,row_B)) {
2768               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2769               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2770               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2771             } else {
2772               for (k=0;k<size_of_constraint;k++) {
2773                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2774                 if (row != col) {
2775                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2776                 } else {
2777                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2778                 }
2779                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2780               }
2781             }
2782           }
2783           if (pcbddc->dbg_flag) {
2784             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2785           }
2786         }
2787       } else {
2788         if (pcbddc->dbg_flag) {
2789           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2790         }
2791       }
2792     }
2793 
2794     /* free workspace */
2795     if (qr_needed) {
2796       if (pcbddc->dbg_flag) {
2797         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2798       }
2799       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2800       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2801       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2802       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2803       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2804     }
2805     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2806     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2807     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2808 
2809     /* assembling of global change of variable */
2810     {
2811       Mat      tmat;
2812       PetscInt bs;
2813 
2814       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2815       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2816       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2817       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2818       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2819       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2820       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2821       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2822       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2823       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2824       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2825       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2826       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2827       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2828       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2829       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2830       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2831       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2832     }
2833     /* check */
2834     if (pcbddc->dbg_flag) {
2835       PetscReal error;
2836       Vec       x,x_change;
2837 
2838       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2839       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2840       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2841       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2842       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2843       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2844       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2845       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2846       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2847       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2848       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2849       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2850       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2851       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2852       ierr = VecDestroy(&x);CHKERRQ(ierr);
2853       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2854     }
2855 
2856     /* adapt sub_schurs computed (if any) */
2857     if (pcbddc->use_deluxe_scaling) {
2858       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2859       if (sub_schurs->S_Ej_all) {
2860         Mat S_new,tmat;
2861         IS is_all_N;
2862 
2863         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2864         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2865         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2866         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2867         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2868         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2869         sub_schurs->S_Ej_all = S_new;
2870         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2871         if (sub_schurs->sum_S_Ej_all) {
2872           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2873           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2874           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2875           sub_schurs->sum_S_Ej_all = S_new;
2876           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2877         }
2878         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2879       }
2880     }
2881     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2882   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2883     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2884     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2885   }
2886 
2887   /* set up change of basis context */
2888   if (pcbddc->ChangeOfBasisMatrix) {
2889     PCBDDCChange_ctx change_ctx;
2890 
2891     if (!pcbddc->new_global_mat) {
2892       PetscInt global_size,local_size;
2893 
2894       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2895       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2896       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2897       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2898       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2899       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2900       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2901       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2902       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2903     } else {
2904       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2905       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2906       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2907     }
2908     if (!pcbddc->user_ChangeOfBasisMatrix) {
2909       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2910       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2911     } else {
2912       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2913       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2914     }
2915     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2916     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2917     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2918     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2919   }
2920 
2921   /* check if a new primal space has been introduced */
2922   pcbddc->new_primal_space_local = PETSC_TRUE;
2923   if (olocal_primal_size == pcbddc->local_primal_size) {
2924     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2925     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2926     if (!pcbddc->new_primal_space_local) {
2927       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2928       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2929     }
2930   }
2931   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2932   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2933   ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2934 
2935   /* flush dbg viewer */
2936   if (pcbddc->dbg_flag) {
2937     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2938   }
2939 
2940   /* free workspace */
2941   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2942   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2943   if (!pcbddc->adaptive_selection) {
2944     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2945     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2946   } else {
2947     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2948                       pcbddc->adaptive_constraints_idxs_ptr,
2949                       pcbddc->adaptive_constraints_data_ptr,
2950                       pcbddc->adaptive_constraints_idxs,
2951                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2952     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2953     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2954   }
2955   PetscFunctionReturn(0);
2956 }
2957 
2958 #undef __FUNCT__
2959 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2960 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2961 {
2962   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2963   PC_IS       *pcis = (PC_IS*)pc->data;
2964   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2965   PetscInt    ierr,i,vertex_size,N;
2966   PetscViewer viewer=pcbddc->dbg_viewer;
2967 
2968   PetscFunctionBegin;
2969   /* Reset previously computed graph */
2970   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2971   /* Init local Graph struct */
2972   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2973   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
2974 
2975   /* Check validity of the csr graph passed in by the user */
2976   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2977     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2978   }
2979 
2980   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2981   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2982     PetscInt  *xadj,*adjncy;
2983     PetscInt  nvtxs;
2984     PetscBool flg_row=PETSC_FALSE;
2985 
2986     if (pcbddc->use_local_adj) {
2987 
2988       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2989       if (flg_row) {
2990         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2991         pcbddc->computed_rowadj = PETSC_TRUE;
2992       }
2993       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2994     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2995       IS                     is_dummy;
2996       ISLocalToGlobalMapping l2gmap_dummy;
2997       PetscInt               j,sum;
2998       PetscInt               *cxadj,*cadjncy;
2999       const PetscInt         *idxs;
3000       PCBDDCGraph            graph;
3001       PetscBT                is_on_boundary;
3002 
3003       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3004       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3005       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3006       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3007       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3008       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3009       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3010       if (flg_row) {
3011         graph->xadj = xadj;
3012         graph->adjncy = adjncy;
3013       }
3014       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3015       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3016       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3017 
3018       if (pcbddc->dbg_flag) {
3019         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3020         for (i=0;i<graph->ncc;i++) {
3021           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3022         }
3023       }
3024 
3025       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3026       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3027       for (i=0;i<pcis->n_B;i++) {
3028         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3029       }
3030       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3031 
3032       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3033       sum = 0;
3034       for (i=0;i<graph->ncc;i++) {
3035         PetscInt sizecc = 0;
3036         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3037           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3038             sizecc++;
3039           }
3040         }
3041         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3042           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3043             cxadj[graph->queue[j]] = sizecc;
3044           }
3045         }
3046         sum += sizecc*sizecc;
3047       }
3048       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3049       sum = 0;
3050       for (i=0;i<pcis->n;i++) {
3051         PetscInt temp = cxadj[i];
3052         cxadj[i] = sum;
3053         sum += temp;
3054       }
3055       cxadj[pcis->n] = sum;
3056       for (i=0;i<graph->ncc;i++) {
3057         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3058           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3059             PetscInt k,sizecc = 0;
3060             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3061               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3062                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3063                 sizecc++;
3064               }
3065             }
3066           }
3067         }
3068       }
3069       if (sum) {
3070         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3071       } else {
3072         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3073         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3074       }
3075       graph->xadj = 0;
3076       graph->adjncy = 0;
3077       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3078       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3079     }
3080   }
3081   if (pcbddc->dbg_flag) {
3082     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3083   }
3084 
3085   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3086   vertex_size = 1;
3087   if (pcbddc->user_provided_isfordofs) {
3088     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3089       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3090       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3091         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3092         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3093       }
3094       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3095       pcbddc->n_ISForDofs = 0;
3096       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3097     }
3098     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3099     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3100   } else {
3101     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3102       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3103       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3104       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3105         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3106       }
3107     }
3108   }
3109 
3110   /* Setup of Graph */
3111   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3112     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3113   }
3114   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3115     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3116   }
3117   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3118 
3119   /* Graph's connected components analysis */
3120   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3121 
3122   /* print some info to stdout */
3123   if (pcbddc->dbg_flag) {
3124     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3125   }
3126 
3127   /* mark topography has done */
3128   pcbddc->recompute_topography = PETSC_FALSE;
3129   PetscFunctionReturn(0);
3130 }
3131 
3132 #undef __FUNCT__
3133 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3134 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3135 {
3136   PetscInt       i,j;
3137   PetscScalar    *alphas;
3138   PetscErrorCode ierr;
3139 
3140   PetscFunctionBegin;
3141   /* this implements stabilized Gram-Schmidt */
3142   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3143   for (i=0;i<n;i++) {
3144     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3145     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3146     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3147   }
3148   ierr = PetscFree(alphas);CHKERRQ(ierr);
3149   PetscFunctionReturn(0);
3150 }
3151 
3152 #undef __FUNCT__
3153 #define __FUNCT__ "MatISGetSubassemblingPattern"
3154 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3155 {
3156   IS             ranks_send_to;
3157   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3158   PetscMPIInt    size,rank,color;
3159   PetscInt       *xadj,*adjncy;
3160   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3161   PetscInt       i,local_size,threshold=0;
3162   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3163   PetscSubcomm   subcomm;
3164   PetscErrorCode ierr;
3165 
3166   PetscFunctionBegin;
3167   ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3168   ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3169   ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3170 
3171   /* Get info on mapping */
3172   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
3173   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3174 
3175   /* build local CSR graph of subdomains' connectivity */
3176   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3177   xadj[0] = 0;
3178   xadj[1] = PetscMax(n_neighs-1,0);
3179   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3180   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3181 
3182   if (threshold) {
3183     PetscInt xadj_count = 0;
3184     for (i=1;i<n_neighs;i++) {
3185       if (n_shared[i] > threshold) {
3186         adjncy[xadj_count] = neighs[i];
3187         adjncy_wgt[xadj_count] = n_shared[i];
3188         xadj_count++;
3189       }
3190     }
3191     xadj[1] = xadj_count;
3192   } else {
3193     if (xadj[1]) {
3194       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3195       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3196     }
3197   }
3198   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3199   if (use_square) {
3200     for (i=0;i<xadj[1];i++) {
3201       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3202     }
3203   }
3204   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3205 
3206   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3207 
3208   /*
3209     Restrict work on active processes only.
3210   */
3211   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3212   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3213   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3214   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3215   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3216   if (color) {
3217     ierr = PetscFree(xadj);CHKERRQ(ierr);
3218     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3219     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3220   } else {
3221     Mat             subdomain_adj;
3222     IS              new_ranks,new_ranks_contig;
3223     MatPartitioning partitioner;
3224     PetscInt        prank,rstart=0,rend=0;
3225     PetscInt        *is_indices,*oldranks;
3226     PetscBool       aggregate;
3227 
3228     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3229     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3230     prank = rank;
3231     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3232     /*
3233     for (i=0;i<size;i++) {
3234       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3235     }
3236     */
3237     for (i=0;i<xadj[1];i++) {
3238       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3239     }
3240     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3241     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3242     if (aggregate) {
3243       PetscInt    lrows,row,ncols,*cols;
3244       PetscMPIInt nrank;
3245       PetscScalar *vals;
3246 
3247       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3248       lrows = 0;
3249       if (nrank<redprocs) {
3250         lrows = size/redprocs;
3251         if (nrank<size%redprocs) lrows++;
3252       }
3253       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3254       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3255       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3256       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3257       row = nrank;
3258       ncols = xadj[1]-xadj[0];
3259       cols = adjncy;
3260       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3261       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3262       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3263       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265       ierr = PetscFree(xadj);CHKERRQ(ierr);
3266       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3267       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3268       ierr = PetscFree(vals);CHKERRQ(ierr);
3269     } else {
3270       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3271     }
3272     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3273 
3274     /* Partition */
3275     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3276     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3277     if (use_vwgt) {
3278       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3279       v_wgt[0] = local_size;
3280       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3281     }
3282     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3283     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3284     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3285     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3286     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3287 
3288     /* renumber new_ranks to avoid "holes" in new set of processors */
3289     ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3290     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3291     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3292     if (!redprocs) {
3293       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3294     } else {
3295       PetscInt    idxs[1];
3296       PetscMPIInt tag;
3297       MPI_Request *reqs;
3298 
3299       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3300       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3301       for (i=rstart;i<rend;i++) {
3302         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3303       }
3304       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3305       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3306       ierr = PetscFree(reqs);CHKERRQ(ierr);
3307       ranks_send_to_idx[0] = oldranks[idxs[0]];
3308     }
3309     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3310     /* clean up */
3311     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3312     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3313     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3314     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3315   }
3316   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3317 
3318   /* assemble parallel IS for sends */
3319   i = 1;
3320   if (color) i=0;
3321   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3322   /* get back IS */
3323   *is_sends = ranks_send_to;
3324   PetscFunctionReturn(0);
3325 }
3326 
3327 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3328 
3329 #undef __FUNCT__
3330 #define __FUNCT__ "MatISSubassemble"
3331 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3332 {
3333   Mat                    local_mat;
3334   IS                     is_sends_internal;
3335   PetscInt               rows,cols,new_local_rows;
3336   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3337   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3338   ISLocalToGlobalMapping l2gmap;
3339   PetscInt*              l2gmap_indices;
3340   const PetscInt*        is_indices;
3341   MatType                new_local_type;
3342   /* buffers */
3343   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3344   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3345   PetscInt               *recv_buffer_idxs_local;
3346   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3347   /* MPI */
3348   MPI_Comm               comm,comm_n;
3349   PetscSubcomm           subcomm;
3350   PetscMPIInt            n_sends,n_recvs,commsize;
3351   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3352   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3353   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3354   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3355   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3356   PetscErrorCode         ierr;
3357 
3358   PetscFunctionBegin;
3359   /* TODO: add missing checks */
3360   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3361   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3362   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3363   PetscValidLogicalCollectiveInt(mat,nis,7);
3364   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3365   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3366   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3367   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3368   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3369   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3370   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3371   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3372     PetscInt mrows,mcols,mnrows,mncols;
3373     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3374     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3375     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3376     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3377     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3378     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3379   }
3380   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3381   PetscValidLogicalCollectiveInt(mat,bs,0);
3382   /* prepare IS for sending if not provided */
3383   if (!is_sends) {
3384     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3385     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
3386   } else {
3387     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3388     is_sends_internal = is_sends;
3389   }
3390 
3391   /* get comm */
3392   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3393 
3394   /* compute number of sends */
3395   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3396   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3397 
3398   /* compute number of receives */
3399   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3400   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3401   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3402   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3403   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3404   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3405   ierr = PetscFree(iflags);CHKERRQ(ierr);
3406 
3407   /* restrict comm if requested */
3408   subcomm = 0;
3409   destroy_mat = PETSC_FALSE;
3410   if (restrict_comm) {
3411     PetscMPIInt color,subcommsize;
3412 
3413     color = 0;
3414     if (restrict_full) {
3415       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3416     } else {
3417       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3418     }
3419     ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3420     subcommsize = commsize - subcommsize;
3421     /* check if reuse has been requested */
3422     if (reuse == MAT_REUSE_MATRIX) {
3423       if (*mat_n) {
3424         PetscMPIInt subcommsize2;
3425         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3426         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3427         comm_n = PetscObjectComm((PetscObject)*mat_n);
3428       } else {
3429         comm_n = PETSC_COMM_SELF;
3430       }
3431     } else { /* MAT_INITIAL_MATRIX */
3432       PetscMPIInt rank;
3433 
3434       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3435       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3436       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3437       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3438       comm_n = PetscSubcommChild(subcomm);
3439     }
3440     /* flag to destroy *mat_n if not significative */
3441     if (color) destroy_mat = PETSC_TRUE;
3442   } else {
3443     comm_n = comm;
3444   }
3445 
3446   /* prepare send/receive buffers */
3447   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3448   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3449   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3450   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3451   if (nis) {
3452     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3453   }
3454 
3455   /* Get data from local matrices */
3456   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3457     /* TODO: See below some guidelines on how to prepare the local buffers */
3458     /*
3459        send_buffer_vals should contain the raw values of the local matrix
3460        send_buffer_idxs should contain:
3461        - MatType_PRIVATE type
3462        - PetscInt        size_of_l2gmap
3463        - PetscInt        global_row_indices[size_of_l2gmap]
3464        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3465     */
3466   else {
3467     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3468     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
3469     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3470     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3471     send_buffer_idxs[1] = i;
3472     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3473     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3474     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3475     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3476     for (i=0;i<n_sends;i++) {
3477       ilengths_vals[is_indices[i]] = len*len;
3478       ilengths_idxs[is_indices[i]] = len+2;
3479     }
3480   }
3481   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3482   /* additional is (if any) */
3483   if (nis) {
3484     PetscMPIInt psum;
3485     PetscInt j;
3486     for (j=0,psum=0;j<nis;j++) {
3487       PetscInt plen;
3488       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3489       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3490       psum += len+1; /* indices + lenght */
3491     }
3492     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3493     for (j=0,psum=0;j<nis;j++) {
3494       PetscInt plen;
3495       const PetscInt *is_array_idxs;
3496       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3497       send_buffer_idxs_is[psum] = plen;
3498       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3499       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3500       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3501       psum += plen+1; /* indices + lenght */
3502     }
3503     for (i=0;i<n_sends;i++) {
3504       ilengths_idxs_is[is_indices[i]] = psum;
3505     }
3506     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3507   }
3508 
3509   buf_size_idxs = 0;
3510   buf_size_vals = 0;
3511   buf_size_idxs_is = 0;
3512   for (i=0;i<n_recvs;i++) {
3513     buf_size_idxs += (PetscInt)olengths_idxs[i];
3514     buf_size_vals += (PetscInt)olengths_vals[i];
3515     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3516   }
3517   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3518   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3519   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3520 
3521   /* get new tags for clean communications */
3522   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3523   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3524   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3525 
3526   /* allocate for requests */
3527   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3528   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3529   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3530   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3531   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3532   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3533 
3534   /* communications */
3535   ptr_idxs = recv_buffer_idxs;
3536   ptr_vals = recv_buffer_vals;
3537   ptr_idxs_is = recv_buffer_idxs_is;
3538   for (i=0;i<n_recvs;i++) {
3539     source_dest = onodes[i];
3540     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3541     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3542     ptr_idxs += olengths_idxs[i];
3543     ptr_vals += olengths_vals[i];
3544     if (nis) {
3545       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3546       ptr_idxs_is += olengths_idxs_is[i];
3547     }
3548   }
3549   for (i=0;i<n_sends;i++) {
3550     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3551     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3552     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3553     if (nis) {
3554       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3555     }
3556   }
3557   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3558   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3559 
3560   /* assemble new l2g map */
3561   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3562   ptr_idxs = recv_buffer_idxs;
3563   new_local_rows = 0;
3564   for (i=0;i<n_recvs;i++) {
3565     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3566     ptr_idxs += olengths_idxs[i];
3567   }
3568   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3569   ptr_idxs = recv_buffer_idxs;
3570   new_local_rows = 0;
3571   for (i=0;i<n_recvs;i++) {
3572     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3573     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3574     ptr_idxs += olengths_idxs[i];
3575   }
3576   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3577   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3578   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3579 
3580   /* infer new local matrix type from received local matrices type */
3581   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3582   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3583   if (n_recvs) {
3584     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3585     ptr_idxs = recv_buffer_idxs;
3586     for (i=0;i<n_recvs;i++) {
3587       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3588         new_local_type_private = MATAIJ_PRIVATE;
3589         break;
3590       }
3591       ptr_idxs += olengths_idxs[i];
3592     }
3593     switch (new_local_type_private) {
3594       case MATDENSE_PRIVATE:
3595         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3596           new_local_type = MATSEQAIJ;
3597           bs = 1;
3598         } else { /* if I receive only 1 dense matrix */
3599           new_local_type = MATSEQDENSE;
3600           bs = 1;
3601         }
3602         break;
3603       case MATAIJ_PRIVATE:
3604         new_local_type = MATSEQAIJ;
3605         bs = 1;
3606         break;
3607       case MATBAIJ_PRIVATE:
3608         new_local_type = MATSEQBAIJ;
3609         break;
3610       case MATSBAIJ_PRIVATE:
3611         new_local_type = MATSEQSBAIJ;
3612         break;
3613       default:
3614         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3615         break;
3616     }
3617   } else { /* by default, new_local_type is seqdense */
3618     new_local_type = MATSEQDENSE;
3619     bs = 1;
3620   }
3621 
3622   /* create MATIS object if needed */
3623   if (reuse == MAT_INITIAL_MATRIX) {
3624     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3625     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
3626   } else {
3627     /* it also destroys the local matrices */
3628     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3629   }
3630   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3631   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3632 
3633   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3634 
3635   /* Global to local map of received indices */
3636   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3637   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3638   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3639 
3640   /* restore attributes -> type of incoming data and its size */
3641   buf_size_idxs = 0;
3642   for (i=0;i<n_recvs;i++) {
3643     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3644     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3645     buf_size_idxs += (PetscInt)olengths_idxs[i];
3646   }
3647   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3648 
3649   /* set preallocation */
3650   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3651   if (!newisdense) {
3652     PetscInt *new_local_nnz=0;
3653 
3654     ptr_idxs = recv_buffer_idxs_local;
3655     if (n_recvs) {
3656       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3657     }
3658     for (i=0;i<n_recvs;i++) {
3659       PetscInt j;
3660       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3661         for (j=0;j<*(ptr_idxs+1);j++) {
3662           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3663         }
3664       } else {
3665         /* TODO */
3666       }
3667       ptr_idxs += olengths_idxs[i];
3668     }
3669     if (new_local_nnz) {
3670       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3671       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3672       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3673       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3674       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3675       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3676     } else {
3677       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3678     }
3679     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3680   } else {
3681     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3682   }
3683 
3684   /* set values */
3685   ptr_vals = recv_buffer_vals;
3686   ptr_idxs = recv_buffer_idxs_local;
3687   for (i=0;i<n_recvs;i++) {
3688     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3689       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3690       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3691       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3692       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3693       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3694     } else {
3695       /* TODO */
3696     }
3697     ptr_idxs += olengths_idxs[i];
3698     ptr_vals += olengths_vals[i];
3699   }
3700   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3701   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3702   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3703   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3704   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3705   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3706 
3707 #if 0
3708   if (!restrict_comm) { /* check */
3709     Vec       lvec,rvec;
3710     PetscReal infty_error;
3711 
3712     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3713     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3714     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3715     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3716     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3717     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3718     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3719     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3720     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3721   }
3722 #endif
3723 
3724   /* assemble new additional is (if any) */
3725   if (nis) {
3726     PetscInt **temp_idxs,*count_is,j,psum;
3727 
3728     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3729     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3730     ptr_idxs = recv_buffer_idxs_is;
3731     psum = 0;
3732     for (i=0;i<n_recvs;i++) {
3733       for (j=0;j<nis;j++) {
3734         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3735         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3736         psum += plen;
3737         ptr_idxs += plen+1; /* shift pointer to received data */
3738       }
3739     }
3740     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3741     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3742     for (i=1;i<nis;i++) {
3743       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3744     }
3745     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3746     ptr_idxs = recv_buffer_idxs_is;
3747     for (i=0;i<n_recvs;i++) {
3748       for (j=0;j<nis;j++) {
3749         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3750         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3751         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3752         ptr_idxs += plen+1; /* shift pointer to received data */
3753       }
3754     }
3755     for (i=0;i<nis;i++) {
3756       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3757       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3758       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3759     }
3760     ierr = PetscFree(count_is);CHKERRQ(ierr);
3761     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3762     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3763   }
3764   /* free workspace */
3765   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3766   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3767   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3768   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3769   if (isdense) {
3770     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3771     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3772   } else {
3773     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3774   }
3775   if (nis) {
3776     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3777     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3778   }
3779   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3780   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3781   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3782   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3783   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3784   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3785   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3786   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3787   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3788   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3789   ierr = PetscFree(onodes);CHKERRQ(ierr);
3790   if (nis) {
3791     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3792     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3793     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3794   }
3795   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3796   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3797     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3798     for (i=0;i<nis;i++) {
3799       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3800     }
3801     *mat_n = NULL;
3802   }
3803   PetscFunctionReturn(0);
3804 }
3805 
3806 /* temporary hack into ksp private data structure */
3807 #include <petsc/private/kspimpl.h>
3808 
3809 #undef __FUNCT__
3810 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3811 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3812 {
3813   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3814   PC_IS                  *pcis = (PC_IS*)pc->data;
3815   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3816   ISLocalToGlobalMapping coarse_islg;
3817   IS                     coarse_is,*isarray;
3818   PetscInt               i,im_active=-1,active_procs=-1;
3819   PetscInt               nis,nisdofs,nisneu;
3820   PC                     pc_temp;
3821   PCType                 coarse_pc_type;
3822   KSPType                coarse_ksp_type;
3823   PetscBool              multilevel_requested,multilevel_allowed;
3824   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3825   Mat                    t_coarse_mat_is;
3826   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3827   PetscMPIInt            all_procs;
3828   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3829   PetscBool              compute_vecs = PETSC_FALSE;
3830   PetscScalar            *array;
3831   PetscErrorCode         ierr;
3832 
3833   PetscFunctionBegin;
3834   /* Assign global numbering to coarse dofs */
3835   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3836     PetscInt ocoarse_size;
3837     compute_vecs = PETSC_TRUE;
3838     ocoarse_size = pcbddc->coarse_size;
3839     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3840     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3841     /* see if we can avoid some work */
3842     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3843       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
3844       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
3845         PC        pc;
3846         PetscBool isbddc;
3847 
3848         /* temporary workaround since PCBDDC does not have a reset method so far */
3849         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
3850         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
3851         if (isbddc) {
3852           ierr = PCDestroy(&pc);CHKERRQ(ierr);
3853         }
3854         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3855         coarse_reuse = PETSC_FALSE;
3856       } else { /* we can safely reuse already computed coarse matrix */
3857         coarse_reuse = PETSC_TRUE;
3858       }
3859     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3860       coarse_reuse = PETSC_FALSE;
3861     }
3862     /* reset any subassembling information */
3863     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3864     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3865   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3866     coarse_reuse = PETSC_TRUE;
3867   }
3868 
3869   /* count "active" (i.e. with positive local size) and "void" processes */
3870   im_active = !!(pcis->n);
3871   ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3872   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3873   void_procs = all_procs-active_procs;
3874   csin_type_simple = PETSC_TRUE;
3875   redist = PETSC_FALSE;
3876   if (pcbddc->current_level && void_procs) {
3877     csin_ml = PETSC_TRUE;
3878     ncoarse_ml = void_procs;
3879     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3880     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3881       csin_ds = PETSC_TRUE;
3882       ncoarse_ds = pcbddc->redistribute_coarse;
3883       redist = PETSC_TRUE;
3884     } else {
3885       csin_ds = PETSC_TRUE;
3886       ncoarse_ds = active_procs;
3887       redist = PETSC_TRUE;
3888     }
3889   } else {
3890     csin_ml = PETSC_FALSE;
3891     ncoarse_ml = all_procs;
3892     if (void_procs) {
3893       csin_ds = PETSC_TRUE;
3894       ncoarse_ds = void_procs;
3895       csin_type_simple = PETSC_FALSE;
3896     } else {
3897       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3898         csin_ds = PETSC_TRUE;
3899         ncoarse_ds = pcbddc->redistribute_coarse;
3900         redist = PETSC_TRUE;
3901       } else {
3902         csin_ds = PETSC_FALSE;
3903         ncoarse_ds = all_procs;
3904       }
3905     }
3906   }
3907 
3908   /*
3909     test if we can go multilevel: three conditions must be satisfied:
3910     - we have not exceeded the number of levels requested
3911     - we can actually subassemble the active processes
3912     - we can find a suitable number of MPI processes where we can place the subassembled problem
3913   */
3914   multilevel_allowed = PETSC_FALSE;
3915   multilevel_requested = PETSC_FALSE;
3916   if (pcbddc->current_level < pcbddc->max_levels) {
3917     multilevel_requested = PETSC_TRUE;
3918     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3919       multilevel_allowed = PETSC_FALSE;
3920     } else {
3921       multilevel_allowed = PETSC_TRUE;
3922     }
3923   }
3924   /* determine number of process partecipating to coarse solver */
3925   if (multilevel_allowed) {
3926     ncoarse = ncoarse_ml;
3927     csin = csin_ml;
3928     redist = PETSC_FALSE;
3929   } else {
3930     ncoarse = ncoarse_ds;
3931     csin = csin_ds;
3932   }
3933 
3934   /* creates temporary l2gmap and IS for coarse indexes */
3935   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3936   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3937 
3938   /* creates temporary MATIS object for coarse matrix */
3939   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3940   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3941   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3942   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3943 #if 0
3944   {
3945     PetscViewer viewer;
3946     char filename[256];
3947     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3948     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3949     ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3950     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3951     ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
3952     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3953   }
3954 #endif
3955   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
3956   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3957   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3958   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3959   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3960 
3961   /* compute dofs splitting and neumann boundaries for coarse dofs */
3962   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3963     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3964     const PetscInt         *idxs;
3965     ISLocalToGlobalMapping tmap;
3966 
3967     /* create map between primal indices (in local representative ordering) and local primal numbering */
3968     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3969     /* allocate space for temporary storage */
3970     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3971     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3972     /* allocate for IS array */
3973     nisdofs = pcbddc->n_ISForDofsLocal;
3974     nisneu = !!pcbddc->NeumannBoundariesLocal;
3975     nis = nisdofs + nisneu;
3976     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3977     /* dofs splitting */
3978     for (i=0;i<nisdofs;i++) {
3979       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3980       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3981       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3982       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3983       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3984       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3985       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3986       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3987     }
3988     /* neumann boundaries */
3989     if (pcbddc->NeumannBoundariesLocal) {
3990       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3991       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3992       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3993       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3994       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3995       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3996       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3997       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3998     }
3999     /* free memory */
4000     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4001     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4002     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4003   } else {
4004     nis = 0;
4005     nisdofs = 0;
4006     nisneu = 0;
4007     isarray = NULL;
4008   }
4009   /* destroy no longer needed map */
4010   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4011 
4012   /* restrict on coarse candidates (if needed) */
4013   coarse_mat_is = NULL;
4014   if (csin) {
4015     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4016       if (redist) {
4017         PetscMPIInt rank;
4018         PetscInt    spc,n_spc_p1,dest[1],destsize;
4019 
4020         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4021         spc = active_procs/ncoarse;
4022         n_spc_p1 = active_procs%ncoarse;
4023         if (im_active) {
4024           destsize = 1;
4025           if (rank > n_spc_p1*(spc+1)-1) {
4026             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4027           } else {
4028             dest[0] = rank/(spc+1);
4029           }
4030         } else {
4031           destsize = 0;
4032         }
4033         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4034       } else if (csin_type_simple) {
4035         PetscMPIInt rank;
4036         PetscInt    issize,isidx;
4037 
4038         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4039         if (im_active) {
4040           issize = 1;
4041           isidx = (PetscInt)rank;
4042         } else {
4043           issize = 0;
4044           isidx = -1;
4045         }
4046         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4047       } else { /* get a suitable subassembling pattern from MATIS code */
4048         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4049       }
4050 
4051       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4052       if (!redist || ncoarse <= void_procs) {
4053         PetscInt ncoarse_cand,tissize,*nisindices;
4054         PetscInt *coarse_candidates;
4055         const PetscInt* tisindices;
4056 
4057         /* get coarse candidates' ranks in pc communicator */
4058         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4059         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4060         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4061           if (!coarse_candidates[i]) {
4062             coarse_candidates[ncoarse_cand++]=i;
4063           }
4064         }
4065         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4066 
4067 
4068         if (pcbddc->dbg_flag) {
4069           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4070           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4071           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4072           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4073           for (i=0;i<ncoarse_cand;i++) {
4074             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4075           }
4076           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4077           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4078         }
4079         /* shift the pattern on coarse candidates */
4080         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4081         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4082         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4083         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4084         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4085         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4086         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4087       }
4088       if (pcbddc->dbg_flag) {
4089         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4090         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4091         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4092         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4093       }
4094     }
4095     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4096     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4097       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4098     } else { /* this is the last level, so use just receiving processes in subcomm */
4099       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4100     }
4101   } else {
4102     if (pcbddc->dbg_flag) {
4103       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4104       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4105       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4106     }
4107     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4108     coarse_mat_is = t_coarse_mat_is;
4109   }
4110 
4111   /* create local to global scatters for coarse problem */
4112   if (compute_vecs) {
4113     PetscInt lrows;
4114     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4115     if (coarse_mat_is) {
4116       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4117     } else {
4118       lrows = 0;
4119     }
4120     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4121     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4122     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4123     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4124     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4125   }
4126   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4127   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4128 
4129   /* set defaults for coarse KSP and PC */
4130   if (multilevel_allowed) {
4131     coarse_ksp_type = KSPRICHARDSON;
4132     coarse_pc_type = PCBDDC;
4133   } else {
4134     coarse_ksp_type = KSPPREONLY;
4135     coarse_pc_type = PCREDUNDANT;
4136   }
4137 
4138   /* print some info if requested */
4139   if (pcbddc->dbg_flag) {
4140     if (!multilevel_allowed) {
4141       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4142       if (multilevel_requested) {
4143         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4144       } else if (pcbddc->max_levels) {
4145         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4146       }
4147       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4148     }
4149   }
4150 
4151   /* create the coarse KSP object only once with defaults */
4152   if (coarse_mat_is) {
4153     MatReuse coarse_mat_reuse;
4154     PetscViewer dbg_viewer = NULL;
4155     if (pcbddc->dbg_flag) {
4156       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4157       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4158     }
4159     if (!pcbddc->coarse_ksp) {
4160       char prefix[256],str_level[16];
4161       size_t len;
4162       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4163       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4164       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4165       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4166       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4167       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4168       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4169       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4170       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4171       /* prefix */
4172       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4173       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4174       if (!pcbddc->current_level) {
4175         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4176         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4177       } else {
4178         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4179         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4180         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4181         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4182         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4183         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4184       }
4185       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4186       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4187       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4188       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4189       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4190       /* allow user customization */
4191       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4192     }
4193     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4194     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4195     if (nisdofs) {
4196       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4197       for (i=0;i<nisdofs;i++) {
4198         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4199       }
4200     }
4201     if (nisneu) {
4202       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4203       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4204     }
4205 
4206     /* get some info after set from options */
4207     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4208     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4209     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4210     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4211       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4212       isbddc = PETSC_FALSE;
4213     }
4214     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4215     if (isredundant) {
4216       KSP inner_ksp;
4217       PC  inner_pc;
4218       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4219       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4220       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4221     }
4222 
4223     /* assemble coarse matrix */
4224     if (coarse_reuse) {
4225       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4226       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4227       coarse_mat_reuse = MAT_REUSE_MATRIX;
4228     } else {
4229       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4230     }
4231     if (isbddc || isnn) {
4232       if (pcbddc->coarsening_ratio > 1) {
4233         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4234           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4235           if (pcbddc->dbg_flag) {
4236             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4237             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4238             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4239             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4240           }
4241         }
4242         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4243       } else {
4244         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4245         coarse_mat = coarse_mat_is;
4246       }
4247     } else {
4248       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4249     }
4250     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4251 
4252     /* propagate symmetry info of coarse matrix */
4253     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4254     if (pc->pmat->symmetric_set) {
4255       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4256     }
4257     if (pc->pmat->hermitian_set) {
4258       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4259     }
4260     if (pc->pmat->spd_set) {
4261       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4262     }
4263     /* set operators */
4264     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4265     if (pcbddc->dbg_flag) {
4266       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4267     }
4268   } else { /* processes non partecipating to coarse solver (if any) */
4269     coarse_mat = 0;
4270   }
4271   ierr = PetscFree(isarray);CHKERRQ(ierr);
4272 #if 0
4273   {
4274     PetscViewer viewer;
4275     char filename[256];
4276     sprintf(filename,"coarse_mat.m");
4277     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4278     ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4279     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4280     ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
4281     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4282   }
4283 #endif
4284 
4285   if (pcbddc->coarse_ksp) {
4286     Vec crhs,csol;
4287     PetscBool ispreonly;
4288 
4289     /* setup coarse ksp */
4290     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4291     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4292     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4293     /* hack */
4294     if (!csol) {
4295       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4296     }
4297     if (!crhs) {
4298       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4299     }
4300     /* Check coarse problem if in debug mode or if solving with an iterative method */
4301     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4302     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4303       KSP       check_ksp;
4304       KSPType   check_ksp_type;
4305       PC        check_pc;
4306       Vec       check_vec,coarse_vec;
4307       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4308       PetscInt  its;
4309       PetscBool compute_eigs;
4310       PetscReal *eigs_r,*eigs_c;
4311       PetscInt  neigs;
4312       const char *prefix;
4313 
4314       /* Create ksp object suitable for estimation of extreme eigenvalues */
4315       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4316       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4317       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4318       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4319       if (ispreonly) {
4320         check_ksp_type = KSPPREONLY;
4321         compute_eigs = PETSC_FALSE;
4322       } else {
4323         check_ksp_type = KSPGMRES;
4324         compute_eigs = PETSC_TRUE;
4325       }
4326       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4327       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4328       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4329       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4330       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4331       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4332       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4333       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4334       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4335       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4336       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4337       /* create random vec */
4338       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4339       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4340       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4341       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4342       /* solve coarse problem */
4343       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4344       /* set eigenvalue estimation if preonly has not been requested */
4345       if (compute_eigs) {
4346         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4347         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4348         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4349         lambda_max = eigs_r[neigs-1];
4350         lambda_min = eigs_r[0];
4351         if (pcbddc->use_coarse_estimates) {
4352           if (lambda_max>lambda_min) {
4353             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4354             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4355           }
4356         }
4357       }
4358 
4359       /* check coarse problem residual error */
4360       if (pcbddc->dbg_flag) {
4361         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4362         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4363         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4364         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4365         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4366         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4367         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4368         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4369         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4370         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4371         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4372         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4373         if (compute_eigs) {
4374           PetscReal lambda_max_s,lambda_min_s;
4375           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4376           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4377           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4378           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4379           for (i=0;i<neigs;i++) {
4380             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4381           }
4382         }
4383         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4384         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4385       }
4386       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4387       if (compute_eigs) {
4388         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4389         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4390       }
4391     }
4392   }
4393   /* print additional info */
4394   if (pcbddc->dbg_flag) {
4395     /* waits until all processes reaches this point */
4396     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4397     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4398     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4399   }
4400 
4401   /* free memory */
4402   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 #undef __FUNCT__
4407 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4408 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4409 {
4410   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4411   PC_IS*         pcis = (PC_IS*)pc->data;
4412   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4413   IS             subset,subset_mult,subset_n;
4414   PetscInt       local_size,coarse_size=0;
4415   PetscInt       *local_primal_indices=NULL;
4416   const PetscInt *t_local_primal_indices;
4417   PetscErrorCode ierr;
4418 
4419   PetscFunctionBegin;
4420   /* Compute global number of coarse dofs */
4421   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4422   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
4423   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
4424   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4425   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
4426   ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
4427   ierr = ISDestroy(&subset);CHKERRQ(ierr);
4428   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
4429   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
4430   if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
4431   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
4432   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4433   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
4434   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4435   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4436 
4437   /* check numbering */
4438   if (pcbddc->dbg_flag) {
4439     PetscScalar coarsesum,*array;
4440     PetscInt    i;
4441     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4442 
4443     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4444     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4445     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4446     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
4447     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4448     for (i=0;i<pcbddc->local_primal_size;i++) {
4449       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4450     }
4451     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4452     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4453     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4454     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4455     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4456     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4457     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4458     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4459     for (i=0;i<pcis->n;i++) {
4460       if (array[i] == 1.0) {
4461         set_error = PETSC_TRUE;
4462         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4463       }
4464     }
4465     ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4466     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4467     for (i=0;i<pcis->n;i++) {
4468       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4469     }
4470     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4471     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4472     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4473     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4474     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4475     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4476     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4477       PetscInt *gidxs;
4478 
4479       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4480       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4481       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4482       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4483       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4484       for (i=0;i<pcbddc->local_primal_size;i++) {
4485         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4486       }
4487       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4488       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4489     }
4490     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4491     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
4492     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4493   }
4494   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4495   /* get back data */
4496   *coarse_size_n = coarse_size;
4497   *local_primal_indices_n = local_primal_indices;
4498   PetscFunctionReturn(0);
4499 }
4500 
4501 #undef __FUNCT__
4502 #define __FUNCT__ "PCBDDCGlobalToLocal"
4503 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4504 {
4505   IS             localis_t;
4506   PetscInt       i,lsize,*idxs,n;
4507   PetscScalar    *vals;
4508   PetscErrorCode ierr;
4509 
4510   PetscFunctionBegin;
4511   /* get indices in local ordering exploiting local to global map */
4512   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4513   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4514   for (i=0;i<lsize;i++) vals[i] = 1.0;
4515   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4516   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4517   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4518   if (idxs) { /* multilevel guard */
4519     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4520   }
4521   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4522   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4523   ierr = PetscFree(vals);CHKERRQ(ierr);
4524   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4525   /* now compute set in local ordering */
4526   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4527   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4528   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4529   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4530   for (i=0,lsize=0;i<n;i++) {
4531     if (PetscRealPart(vals[i]) > 0.5) {
4532       lsize++;
4533     }
4534   }
4535   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4536   for (i=0,lsize=0;i<n;i++) {
4537     if (PetscRealPart(vals[i]) > 0.5) {
4538       idxs[lsize++] = i;
4539     }
4540   }
4541   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4542   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4543   *localis = localis_t;
4544   PetscFunctionReturn(0);
4545 }
4546 
4547 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4548 #undef __FUNCT__
4549 #define __FUNCT__ "PCBDDCMatMult_Private"
4550 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4551 {
4552   PCBDDCChange_ctx change_ctx;
4553   PetscErrorCode   ierr;
4554 
4555   PetscFunctionBegin;
4556   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4557   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4558   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4559   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4560   PetscFunctionReturn(0);
4561 }
4562 
4563 #undef __FUNCT__
4564 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4565 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4566 {
4567   PCBDDCChange_ctx change_ctx;
4568   PetscErrorCode   ierr;
4569 
4570   PetscFunctionBegin;
4571   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4572   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4573   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4574   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4575   PetscFunctionReturn(0);
4576 }
4577 
4578 #undef __FUNCT__
4579 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4580 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4581 {
4582   PC_IS               *pcis=(PC_IS*)pc->data;
4583   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4584   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4585   Mat                 S_j;
4586   PetscInt            *used_xadj,*used_adjncy;
4587   PetscBool           free_used_adj;
4588   PetscErrorCode      ierr;
4589 
4590   PetscFunctionBegin;
4591   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4592   free_used_adj = PETSC_FALSE;
4593   if (pcbddc->sub_schurs_layers == -1) {
4594     used_xadj = NULL;
4595     used_adjncy = NULL;
4596   } else {
4597     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4598       used_xadj = pcbddc->mat_graph->xadj;
4599       used_adjncy = pcbddc->mat_graph->adjncy;
4600     } else if (pcbddc->computed_rowadj) {
4601       used_xadj = pcbddc->mat_graph->xadj;
4602       used_adjncy = pcbddc->mat_graph->adjncy;
4603     } else {
4604       PetscBool      flg_row=PETSC_FALSE;
4605       const PetscInt *xadj,*adjncy;
4606       PetscInt       nvtxs;
4607 
4608       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4609       if (flg_row) {
4610         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4611         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4612         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4613         free_used_adj = PETSC_TRUE;
4614       } else {
4615         pcbddc->sub_schurs_layers = -1;
4616         used_xadj = NULL;
4617         used_adjncy = NULL;
4618       }
4619       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4620     }
4621   }
4622 
4623   /* setup sub_schurs data */
4624   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4625   if (!sub_schurs->use_mumps) {
4626     /* pcbddc->ksp_D up to date only if not using MUMPS */
4627     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4628     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr);
4629   } else {
4630     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4631     PetscBool isseqaij;
4632     if (!pcbddc->use_vertices && reuse_solvers) {
4633       PetscInt n_vertices;
4634 
4635       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
4636       reuse_solvers = (PetscBool)!n_vertices;
4637     }
4638     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
4639     if (!isseqaij) {
4640       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4641       if (matis->A == pcbddc->local_mat) {
4642         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
4643         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
4644       } else {
4645         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
4646       }
4647     }
4648     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr);
4649   }
4650   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4651 
4652   /* free adjacency */
4653   if (free_used_adj) {
4654     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4655   }
4656   PetscFunctionReturn(0);
4657 }
4658 
4659 #undef __FUNCT__
4660 #define __FUNCT__ "PCBDDCInitSubSchurs"
4661 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4662 {
4663   PC_IS               *pcis=(PC_IS*)pc->data;
4664   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4665   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4666   PCBDDCGraph         graph;
4667   PetscErrorCode      ierr;
4668 
4669   PetscFunctionBegin;
4670   /* attach interface graph for determining subsets */
4671   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4672     IS       verticesIS,verticescomm;
4673     PetscInt vsize,*idxs;
4674 
4675     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4676     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
4677     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4678     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
4679     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4680     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4681     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4682     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4683     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
4684     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
4685     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4686 /*
4687     if (pcbddc->dbg_flag) {
4688       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4689     }
4690 */
4691   } else {
4692     graph = pcbddc->mat_graph;
4693   }
4694 
4695   /* sub_schurs init */
4696   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4697 
4698   /* free graph struct */
4699   if (pcbddc->sub_schurs_rebuild) {
4700     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4701   }
4702   PetscFunctionReturn(0);
4703 }
4704