xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision a7fac7c2b47dbcd8ece0cc2dfdfe0e63be1bb7b5)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     PetscInt subset_size;
51 
52     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53     mss = PetscMax(mss,subset_size);
54   }
55 
56   /* min/max and threshold */
57   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
58   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
59   nmax = PetscMax(nmin,nmax);
60   allocated_S_St = PETSC_FALSE;
61   if (nmin) {
62     allocated_S_St = PETSC_TRUE;
63   }
64 
65   /* allocate lapack workspace */
66   cum = cum2 = 0;
67   maxneigs = 0;
68   for (i=0;i<sub_schurs->n_subs;i++) {
69     PetscInt n,subset_size;
70 
71     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
72     n = PetscMin(subset_size,nmax);
73     cum += subset_size;
74     cum2 += subset_size*n;
75     maxneigs = PetscMax(maxneigs,n);
76   }
77   if (mss) {
78     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
79       PetscBLASInt B_itype = 1;
80       PetscBLASInt B_N = mss;
81       PetscReal    zero = 0.0;
82       PetscReal    eps = 0.0; /* dlamch? */
83 
84       B_lwork = -1;
85       S = NULL;
86       St = NULL;
87       eigs = NULL;
88       eigv = NULL;
89       B_iwork = NULL;
90       B_ifail = NULL;
91 #if defined(PETSC_USE_COMPLEX)
92       rwork = NULL;
93 #endif
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154 
155     const PetscInt *idxs;
156     PetscReal      infty = PETSC_MAX_REAL;
157     PetscInt       j,subset_size,eigs_start = 0;
158     PetscBLASInt   B_N;
159     PetscBool      same_data = PETSC_FALSE;
160 
161     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
162     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
163     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
164       if (sub_schurs->is_hermitian) {
165         PetscInt j,k;
166         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
167           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
168           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
169         }
170         for (j=0;j<subset_size;j++) {
171           for (k=j;k<subset_size;k++) {
172             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
173             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
174           }
175         }
176       } else {
177         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
178         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
179       }
180     } else {
181       S = Sarray + cumarray;
182       St = Starray + cumarray;
183     }
184 
185     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
186     /* see if we can save some work */
187     if (sub_schurs->n_subs == 1) {
188       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
189     }
190 
191     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
192       B_neigs = 0;
193     } else {
194       /* Threshold: this is an heuristic for edges */
195       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
196 
197       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
198         PetscBLASInt B_itype = 1;
199         PetscBLASInt B_IL, B_IU;
200         PetscReal    eps = -1.0; /* dlamch? */
201         PetscInt     nmin_s;
202 
203         /* ask for eigenvalues larger than thresh */
204         if (pcbddc->dbg_flag) {
205           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
206         }
207         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
208 #if defined(PETSC_USE_COMPLEX)
209         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
210 #else
211         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
212 #endif
213         ierr = PetscFPTrapPop();CHKERRQ(ierr);
214         if (B_ierr) {
215           if (B_ierr < 0 ) {
216             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
217           } else if (B_ierr <= B_N) {
218             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
219           } else {
220             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
221           }
222         }
223 
224         if (B_neigs > nmax) {
225           if (pcbddc->dbg_flag) {
226             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
227           }
228           eigs_start = B_neigs -nmax;
229           B_neigs = nmax;
230         }
231 
232         nmin_s = PetscMin(nmin,B_N);
233         if (B_neigs < nmin_s) {
234           PetscBLASInt B_neigs2;
235 
236           B_IU = B_N - B_neigs;
237           B_IL = B_N - nmin_s + 1;
238           if (pcbddc->dbg_flag) {
239             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
240           }
241           if (sub_schurs->is_hermitian) {
242             PetscInt j;
243             for (j=0;j<subset_size;j++) {
244               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
245             }
246             for (j=0;j<subset_size;j++) {
247               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
248             }
249           } else {
250             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
251             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
252           }
253           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
254 #if defined(PETSC_USE_COMPLEX)
255           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
256 #else
257           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
258 #endif
259           ierr = PetscFPTrapPop();CHKERRQ(ierr);
260           B_neigs += B_neigs2;
261         }
262         if (B_ierr) {
263           if (B_ierr < 0 ) {
264             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
265           } else if (B_ierr <= B_N) {
266             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
267           } else {
268             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
269           }
270         }
271         if (pcbddc->dbg_flag) {
272           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
273           for (j=0;j<B_neigs;j++) {
274             if (eigs[j] == 0.0) {
275               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
276             } else {
277               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
278             }
279           }
280         }
281       } else {
282           /* TODO */
283       }
284     }
285     maxneigs = PetscMax(B_neigs,maxneigs);
286     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
287     if (B_neigs) {
288       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
289 
290       if (pcbddc->dbg_flag > 1) {
291         PetscInt ii;
292         for (ii=0;ii<B_neigs;ii++) {
293           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
294           for (j=0;j<B_N;j++) {
295 #if defined(PETSC_USE_COMPLEX)
296             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
297             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
298             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
299 #else
300             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
301 #endif
302           }
303         }
304       }
305 #if 0
306       for (j=0;j<B_neigs;j++) {
307         PetscBLASInt Blas_N,Blas_one = 1.0;
308         PetscScalar norm;
309         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
310         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
311                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
312         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
313           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
314         } else {
315           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
316         }
317         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
318       }
319 #endif
320       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
321       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
322       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
323       cum++;
324     }
325     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
326     /* shift for next computation */
327     cumarray += subset_size*subset_size;
328   }
329   if (pcbddc->dbg_flag) {
330     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
331   }
332 
333   if (mss) {
334     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
335     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
336     /* destroy matrices (junk) */
337     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
338     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
339   }
340   if (allocated_S_St) {
341     ierr = PetscFree2(S,St);CHKERRQ(ierr);
342   }
343   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
344 #if defined(PETSC_USE_COMPLEX)
345   ierr = PetscFree(rwork);CHKERRQ(ierr);
346 #endif
347   if (pcbddc->dbg_flag) {
348     PetscInt maxneigs_r;
349     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
350     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
351   }
352   PetscFunctionReturn(0);
353 }
354 
355 #undef __FUNCT__
356 #define __FUNCT__ "PCBDDCSetUpSolvers"
357 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
358 {
359   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
360   PetscScalar    *coarse_submat_vals;
361   PetscErrorCode ierr;
362 
363   PetscFunctionBegin;
364   /* Setup local scatters R_to_B and (optionally) R_to_D */
365   /* PCBDDCSetUpLocalWorkVectors should be called first! */
366   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
367 
368   /* Setup local neumann solver ksp_R */
369   /* PCBDDCSetUpLocalScatters should be called first! */
370   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
371 
372   /* Change global null space passed in by the user if change of basis has been requested */
373   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
374     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
375   }
376 
377   /*
378      Setup local correction and local part of coarse basis.
379      Gives back the dense local part of the coarse matrix in column major ordering
380   */
381   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
382 
383   /* Compute total number of coarse nodes and setup coarse solver */
384   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
385 
386   /* free */
387   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "PCBDDCResetCustomization"
393 PetscErrorCode PCBDDCResetCustomization(PC pc)
394 {
395   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
396   PetscErrorCode ierr;
397 
398   PetscFunctionBegin;
399   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
400   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
401   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
402   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
403   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
404   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
405   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
406   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
407   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
408   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
409   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "PCBDDCResetTopography"
415 PetscErrorCode PCBDDCResetTopography(PC pc)
416 {
417   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
418   PetscErrorCode ierr;
419 
420   PetscFunctionBegin;
421   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
422   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
423   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
424   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
425   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "PCBDDCResetSolvers"
431 PetscErrorCode PCBDDCResetSolvers(PC pc)
432 {
433   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
434   PetscScalar    *array;
435   PetscErrorCode ierr;
436 
437   PetscFunctionBegin;
438   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
439   if (pcbddc->coarse_phi_B) {
440     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
441     ierr = PetscFree(array);CHKERRQ(ierr);
442   }
443   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
445   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
446   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
447   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
448   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
449   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
450   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
451   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
452   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
453   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
454   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
455   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
456   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
457   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
458   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
459   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
460   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
461   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
462   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
463   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
464   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
465   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
471 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
472 {
473   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
474   PC_IS          *pcis = (PC_IS*)pc->data;
475   VecType        impVecType;
476   PetscInt       n_constraints,n_R,old_size;
477   PetscErrorCode ierr;
478 
479   PetscFunctionBegin;
480   if (!pcbddc->ConstraintMatrix) {
481     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
482   }
483   /* get sizes */
484   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
485   n_R = pcis->n-pcbddc->n_vertices;
486   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
487   /* local work vectors (try to avoid unneeded work)*/
488   /* R nodes */
489   old_size = -1;
490   if (pcbddc->vec1_R) {
491     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
492   }
493   if (n_R != old_size) {
494     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
495     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
496     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
497     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
498     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
499     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
500   }
501   /* local primal dofs */
502   old_size = -1;
503   if (pcbddc->vec1_P) {
504     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
505   }
506   if (pcbddc->local_primal_size != old_size) {
507     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
508     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
509     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
510     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
511   }
512   /* local explicit constraints */
513   old_size = -1;
514   if (pcbddc->vec1_C) {
515     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
516   }
517   if (n_constraints && n_constraints != old_size) {
518     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
519     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
520     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
521     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
522   }
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "PCBDDCSetUpCorrection"
528 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
529 {
530   PetscErrorCode  ierr;
531   /* pointers to pcis and pcbddc */
532   PC_IS*          pcis = (PC_IS*)pc->data;
533   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
534   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
535   /* submatrices of local problem */
536   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
537   /* submatrices of local coarse problem */
538   Mat             S_VV,S_CV,S_VC,S_CC;
539   /* working matrices */
540   Mat             C_CR;
541   /* additional working stuff */
542   PC              pc_R;
543   Mat             F;
544   PetscBool       isLU,isCHOL,isILU;
545 
546   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
547   PetscScalar     *work;
548   PetscInt        *idx_V_B;
549   PetscInt        n,n_vertices,n_constraints;
550   PetscInt        i,n_R,n_D,n_B;
551   PetscBool       unsymmetric_check;
552   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
553   MatType         impMatType;
554   /* some shortcuts to scalars */
555   PetscScalar     one=1.0,m_one=-1.0;
556 
557   PetscFunctionBegin;
558   n_vertices = pcbddc->n_vertices;
559   n_constraints = pcbddc->local_primal_size-n_vertices;
560   /* Set Non-overlapping dimensions */
561   n_B = pcis->n_B;
562   n_D = pcis->n - n_B;
563   n_R = pcis->n - n_vertices;
564 
565   /* Set types for local objects needed by BDDC precondtioner */
566   impMatType = MATSEQDENSE;
567 
568   /* vertices in boundary numbering */
569   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
570   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
571   if (i != n_vertices) {
572     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
573   }
574 
575   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
576   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
577   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
578   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
579   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
580   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
581   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
582   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
583   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
584   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
585 
586   unsymmetric_check = PETSC_FALSE;
587   /* allocate workspace */
588   n = 0;
589   if (n_constraints) {
590     n += n_R*n_constraints;
591   }
592   if (n_vertices) {
593     n = PetscMax(2*n_R*n_vertices,n);
594     n = PetscMax((n_R+n_B)*n_vertices,n);
595   }
596   if (!pcbddc->symmetric_primal) {
597     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
598     unsymmetric_check = PETSC_TRUE;
599   }
600   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
601 
602   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
603   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
604   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
605   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
606   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
607   if (isLU || isILU || isCHOL) {
608     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
609   } else if (sub_schurs->reuse_mumps) {
610     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
611     MatFactorType type;
612 
613     F = reuse_mumps->F;
614     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
615     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
616   } else {
617     F = NULL;
618   }
619 
620   /* Precompute stuffs needed for preprocessing and application of BDDC*/
621   if (n_constraints) {
622     Mat         M1,M2,M3;
623     Mat         auxmat;
624     IS          is_aux;
625     PetscScalar *array,*array2;
626 
627     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
628     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
629 
630     /* Extract constraints on R nodes: C_{CR}  */
631     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
632     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
633     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
634 
635     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
636     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
637     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
638     for (i=0;i<n_constraints;i++) {
639       const PetscScalar *row_cmat_values;
640       const PetscInt    *row_cmat_indices;
641       PetscInt          size_of_constraint,j;
642 
643       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
644       for (j=0;j<size_of_constraint;j++) {
645         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
646       }
647       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
648     }
649     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
650     if (F) {
651       Mat B;
652 
653       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
654       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
655       ierr = MatDestroy(&B);CHKERRQ(ierr);
656     } else {
657       PetscScalar *marr;
658 
659       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
660       for (i=0;i<n_constraints;i++) {
661         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
662         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
663         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
664         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
665         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
666       }
667       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
668     }
669     if (!pcbddc->switch_static) {
670       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
671       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
672       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
673       for (i=0;i<n_constraints;i++) {
674         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
675         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
676         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
677         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
678         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
679         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
680       }
681       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
682       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
683       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
684     } else {
685       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
686       pcbddc->local_auxmat2 = local_auxmat2_R;
687       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
688     }
689     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
690     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
691     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
692     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
693     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
694     if (isCHOL) {
695       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
696     } else {
697       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
698     }
699     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
700     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
701     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
702     ierr = MatDestroy(&M2);CHKERRQ(ierr);
703     ierr = MatDestroy(&M3);CHKERRQ(ierr);
704     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
705     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
706     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
707     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
708     ierr = MatDestroy(&M1);CHKERRQ(ierr);
709   }
710   /* Get submatrices from subdomain matrix */
711   if (n_vertices) {
712     IS is_aux;
713 
714     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
715       IS tis;
716 
717       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
718       ierr = ISSort(tis);CHKERRQ(ierr);
719       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
720       ierr = ISDestroy(&tis);CHKERRQ(ierr);
721     } else {
722       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
723     }
724     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
725     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
726     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
727     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
728   }
729 
730   /* Matrix of coarse basis functions (local) */
731   if (pcbddc->coarse_phi_B) {
732     PetscInt on_B,on_primal,on_D=n_D;
733     if (pcbddc->coarse_phi_D) {
734       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
735     }
736     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
737     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
738       PetscScalar *marray;
739 
740       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
741       ierr = PetscFree(marray);CHKERRQ(ierr);
742       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
743       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
744       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
745       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
746     }
747   }
748 
749   if (!pcbddc->coarse_phi_B) {
750     PetscScalar *marray;
751 
752     n = n_B*pcbddc->local_primal_size;
753     if (pcbddc->switch_static || pcbddc->dbg_flag) {
754       n += n_D*pcbddc->local_primal_size;
755     }
756     if (!pcbddc->symmetric_primal) {
757       n *= 2;
758     }
759     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
760     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
761     n = n_B*pcbddc->local_primal_size;
762     if (pcbddc->switch_static || pcbddc->dbg_flag) {
763       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
764       n += n_D*pcbddc->local_primal_size;
765     }
766     if (!pcbddc->symmetric_primal) {
767       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
768       if (pcbddc->switch_static || pcbddc->dbg_flag) {
769         n = n_B*pcbddc->local_primal_size;
770         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
771       }
772     } else {
773       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
774       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
775       if (pcbddc->switch_static || pcbddc->dbg_flag) {
776         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
777         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
778       }
779     }
780   }
781   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
782   /* vertices */
783   if (n_vertices) {
784 
785     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
786 
787     if (n_R) {
788       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
789       PetscBLASInt B_N,B_one = 1;
790       PetscScalar  *x,*y;
791       PetscBool    isseqaij;
792 
793       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
794       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
795       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
796       if (F) { /* TODO could be optimized for symmetric problems */
797         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
798       } else {
799         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
800         for (i=0;i<n_vertices;i++) {
801           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
802           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
803           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
804           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
805           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
806         }
807         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
808       }
809       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
810       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
811       if (n_constraints) {
812         Mat B;
813 
814         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
815         for (i=0;i<n_vertices;i++) {
816           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
817           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
818           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
819           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
821           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
822         }
823         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
824         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
825         ierr = MatDestroy(&B);CHKERRQ(ierr);
826         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
827         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
828         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
829         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
830         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
831         ierr = MatDestroy(&B);CHKERRQ(ierr);
832       }
833       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
834       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
835         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
836       }
837       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
838       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
839       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
840       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
841       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
842       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
843       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
844       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
845       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
846       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
847     } else {
848       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
849     }
850     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
851     /* coarse basis functions */
852     for (i=0;i<n_vertices;i++) {
853       PetscScalar *y;
854 
855       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
856       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
857       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
858       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860       y[n_B*i+idx_V_B[i]] = 1.0;
861       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
862       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
863 
864       if (pcbddc->switch_static || pcbddc->dbg_flag) {
865         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
866         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
867         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
870         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
871       }
872       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
873     }
874     /* if n_R == 0 the object is not destroyed */
875     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
876   }
877 
878   if (n_constraints) {
879     Mat B;
880 
881     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
882     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
883     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
884     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
885     if (n_vertices) {
886       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
887         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
888       } else {
889         Mat S_VCt;
890 
891         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
892         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
893         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
894       }
895     }
896     ierr = MatDestroy(&B);CHKERRQ(ierr);
897     /* coarse basis functions */
898     for (i=0;i<n_constraints;i++) {
899       PetscScalar *y;
900 
901       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
902       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
903       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
904       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
907       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
908       if (pcbddc->switch_static || pcbddc->dbg_flag) {
909         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
910         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
911         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
912         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
914         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
915       }
916       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
917     }
918   }
919   if (n_constraints) {
920     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
921   }
922 
923   /* compute other basis functions for non-symmetric problems */
924   if (!pcbddc->symmetric_primal) {
925 
926     if (n_constraints) {
927       Mat S_CCT,B_C;
928 
929       /* this is a lazy thing */
930       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
931       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
932       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
933       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
934       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
935       if (n_vertices) {
936         Mat B_V,S_VCT;
937 
938         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
939         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
940         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
941         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
942         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
943       }
944       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
945     } else { /* if there are no constraints, reset work */
946       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
947     }
948     if (n_vertices && n_R) {
949       Mat          A_VRT;
950       PetscScalar  *marray;
951       PetscBLASInt B_N,B_one = 1;
952 
953       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
954       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
955       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
956       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
957       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
958       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
959       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
960     }
961 
962     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
963       for (i=0;i<pcbddc->local_primal_size;i++) {
964         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
965         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
966         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
967         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
968         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
969       }
970     } else {
971       for (i=0;i<pcbddc->local_primal_size;i++) {
972         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
973         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
974         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
975         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
976         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
977       }
978     }
979     /* coarse basis functions */
980     for (i=0;i<pcbddc->local_primal_size;i++) {
981       PetscScalar *y;
982 
983       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
984       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
985       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
986       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
988       if (i<n_vertices) {
989         y[n_B*i+idx_V_B[i]] = 1.0;
990       }
991       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
992       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
993 
994       if (pcbddc->switch_static || pcbddc->dbg_flag) {
995         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
996         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
997         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
999         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1000         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1001       }
1002       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1003     }
1004   }
1005   /* free memory */
1006   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1007   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1008   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1009   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1010   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1011   ierr = PetscFree(work);CHKERRQ(ierr);
1012   if (n_vertices) {
1013     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1014   }
1015   if (n_constraints) {
1016     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1017   }
1018   /* Checking coarse_sub_mat and coarse basis functios */
1019   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1020   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1021   if (pcbddc->dbg_flag) {
1022     Mat         coarse_sub_mat;
1023     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1024     Mat         coarse_phi_D,coarse_phi_B;
1025     Mat         coarse_psi_D,coarse_psi_B;
1026     Mat         A_II,A_BB,A_IB,A_BI;
1027     Mat         C_B,CPHI;
1028     IS          is_dummy;
1029     Vec         mones;
1030     MatType     checkmattype=MATSEQAIJ;
1031     PetscReal   real_value;
1032 
1033     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1034     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1035     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1036     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1037     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1038     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1039     if (unsymmetric_check) {
1040       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1041       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1042     }
1043     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1044 
1045     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1046     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1047     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1048     if (unsymmetric_check) {
1049       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1050       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1051       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1052       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1053       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1054       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1055       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1056       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1057       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1058       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1059       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1060       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1061     } else {
1062       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1063       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1064       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1065       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1066       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1067       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1068       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1069       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1070     }
1071     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1072     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1073     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1074     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1075     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1076     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1077     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1078     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1079 
1080     /* check constraints */
1081     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1082     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1083     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1084     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1085     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1086     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1087     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1088     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1089     if (unsymmetric_check) {
1090       ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1091       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1092       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1093       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1094       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1095     }
1096     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1097     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1098     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1099     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1100 
1101     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1102     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1103     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1104     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1105     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1106     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1107     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1108     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1109     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1110     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1111     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1112     if (unsymmetric_check) {
1113       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1114       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1115     }
1116     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1117   }
1118   /* get back data */
1119   *coarse_submat_vals_n = coarse_submat_vals;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1125 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1126 {
1127   Mat            *work_mat;
1128   IS             isrow_s,iscol_s;
1129   PetscBool      rsorted,csorted;
1130   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1135   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1136   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1137   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1138 
1139   if (!rsorted) {
1140     const PetscInt *idxs;
1141     PetscInt *idxs_sorted,i;
1142 
1143     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1144     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1145     for (i=0;i<rsize;i++) {
1146       idxs_perm_r[i] = i;
1147     }
1148     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1149     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1150     for (i=0;i<rsize;i++) {
1151       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1152     }
1153     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1154     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1155   } else {
1156     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1157     isrow_s = isrow;
1158   }
1159 
1160   if (!csorted) {
1161     if (isrow == iscol) {
1162       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1163       iscol_s = isrow_s;
1164     } else {
1165       const PetscInt *idxs;
1166       PetscInt *idxs_sorted,i;
1167 
1168       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1169       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1170       for (i=0;i<csize;i++) {
1171         idxs_perm_c[i] = i;
1172       }
1173       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1174       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1175       for (i=0;i<csize;i++) {
1176         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1177       }
1178       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1179       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1180     }
1181   } else {
1182     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1183     iscol_s = iscol;
1184   }
1185 
1186   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1187 
1188   if (!rsorted || !csorted) {
1189     Mat      new_mat;
1190     IS       is_perm_r,is_perm_c;
1191 
1192     if (!rsorted) {
1193       PetscInt *idxs_r,i;
1194       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1195       for (i=0;i<rsize;i++) {
1196         idxs_r[idxs_perm_r[i]] = i;
1197       }
1198       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1199       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1200     } else {
1201       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1202     }
1203     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1204 
1205     if (!csorted) {
1206       if (isrow_s == iscol_s) {
1207         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1208         is_perm_c = is_perm_r;
1209       } else {
1210         PetscInt *idxs_c,i;
1211         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1212         for (i=0;i<csize;i++) {
1213           idxs_c[idxs_perm_c[i]] = i;
1214         }
1215         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1216         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1217       }
1218     } else {
1219       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1220     }
1221     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1222 
1223     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1224     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1225     work_mat[0] = new_mat;
1226     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1227     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1228   }
1229 
1230   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1231   *B = work_mat[0];
1232   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1233   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1234   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1240 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1241 {
1242   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1243   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1244   Mat            new_mat;
1245   IS             is_local,is_global;
1246   PetscInt       local_size;
1247   PetscBool      isseqaij;
1248   PetscErrorCode ierr;
1249 
1250   PetscFunctionBegin;
1251   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1252   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1253   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1254   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1255   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1256   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1257   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1258 
1259   /* check */
1260   if (pcbddc->dbg_flag) {
1261     Vec       x,x_change;
1262     PetscReal error;
1263 
1264     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1265     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1266     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1267     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1270     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1271     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1272     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1273     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1274     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1275     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1276     ierr = VecDestroy(&x);CHKERRQ(ierr);
1277     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1278   }
1279 
1280   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1281   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1282   if (isseqaij) {
1283     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1284   } else {
1285     Mat work_mat;
1286     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1287     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1288     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1289   }
1290   if (matis->A->symmetric_set) {
1291     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1292 #if !defined(PETSC_USE_COMPLEX)
1293     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1294 #endif
1295   }
1296   /*
1297   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1298   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1299   */
1300   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1306 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1307 {
1308   PC_IS*          pcis = (PC_IS*)(pc->data);
1309   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1310   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1311   PetscInt        *idx_R_local=NULL;
1312   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1313   PetscInt        vbs,bs;
1314   PetscBT         bitmask=NULL;
1315   PetscErrorCode  ierr;
1316 
1317   PetscFunctionBegin;
1318   /*
1319     No need to setup local scatters if
1320       - primal space is unchanged
1321         AND
1322       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1323         AND
1324       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1325   */
1326   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1327     PetscFunctionReturn(0);
1328   }
1329   /* destroy old objects */
1330   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1331   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1332   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1333   /* Set Non-overlapping dimensions */
1334   n_B = pcis->n_B;
1335   n_D = pcis->n - n_B;
1336   n_vertices = pcbddc->n_vertices;
1337 
1338   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1339 
1340   /* create auxiliary bitmask and allocate workspace */
1341   if (!sub_schurs->reuse_mumps) {
1342     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1343     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1344     for (i=0;i<n_vertices;i++) {
1345       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1346     }
1347 
1348     for (i=0, n_R=0; i<pcis->n; i++) {
1349       if (!PetscBTLookup(bitmask,i)) {
1350         idx_R_local[n_R++] = i;
1351       }
1352     }
1353   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1354     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1355 
1356     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1357     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1358   }
1359 
1360   /* Block code */
1361   vbs = 1;
1362   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1363   if (bs>1 && !(n_vertices%bs)) {
1364     PetscBool is_blocked = PETSC_TRUE;
1365     PetscInt  *vary;
1366     if (!sub_schurs->reuse_mumps) {
1367       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1368       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1369       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1370       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1371       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1372       for (i=0; i<pcis->n/bs; i++) {
1373         if (vary[i]!=0 && vary[i]!=bs) {
1374           is_blocked = PETSC_FALSE;
1375           break;
1376         }
1377       }
1378       ierr = PetscFree(vary);CHKERRQ(ierr);
1379     } else {
1380       /* Verify directly the R set */
1381       for (i=0; i<n_R/bs; i++) {
1382         PetscInt j,node=idx_R_local[bs*i];
1383         for (j=1; j<bs; j++) {
1384           if (node != idx_R_local[bs*i+j]-j) {
1385             is_blocked = PETSC_FALSE;
1386             break;
1387           }
1388         }
1389       }
1390     }
1391     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1392       vbs = bs;
1393       for (i=0;i<n_R/vbs;i++) {
1394         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1395       }
1396     }
1397   }
1398   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1399   if (sub_schurs->reuse_mumps) {
1400     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1401 
1402     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1403     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1404     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1405     reuse_mumps->is_R = pcbddc->is_R_local;
1406   } else {
1407     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1408   }
1409 
1410   /* print some info if requested */
1411   if (pcbddc->dbg_flag) {
1412     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1413     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1414     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1415     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1416     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1417     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1418     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1419   }
1420 
1421   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1422   if (!sub_schurs->reuse_mumps) {
1423     IS       is_aux1,is_aux2;
1424     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1425 
1426     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1427     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1428     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1429     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1430     for (i=0; i<n_D; i++) {
1431       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1432     }
1433     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1434     for (i=0, j=0; i<n_R; i++) {
1435       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1436         aux_array1[j++] = i;
1437       }
1438     }
1439     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1440     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1441     for (i=0, j=0; i<n_B; i++) {
1442       if (!PetscBTLookup(bitmask,is_indices[i])) {
1443         aux_array2[j++] = i;
1444       }
1445     }
1446     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1447     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1448     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1449     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1450     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1451 
1452     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1453       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1454       for (i=0, j=0; i<n_R; i++) {
1455         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1456           aux_array1[j++] = i;
1457         }
1458       }
1459       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1460       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1461       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1462     }
1463     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1464     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1465   } else {
1466     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1467     IS               tis;
1468     PetscInt         schur_size;
1469 
1470     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1471     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1472     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1473     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1474     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1475       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1476       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1477       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1478     }
1479   }
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 
1484 #undef __FUNCT__
1485 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1486 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1487 {
1488   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1489   PC_IS          *pcis = (PC_IS*)pc->data;
1490   PC             pc_temp;
1491   Mat            A_RR;
1492   MatReuse       reuse;
1493   PetscScalar    m_one = -1.0;
1494   PetscReal      value;
1495   PetscInt       n_D,n_R;
1496   PetscBool      use_exact,use_exact_reduced,issbaij;
1497   PetscErrorCode ierr;
1498   /* prefixes stuff */
1499   char           dir_prefix[256],neu_prefix[256],str_level[16];
1500   size_t         len;
1501 
1502   PetscFunctionBegin;
1503 
1504   /* compute prefixes */
1505   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1506   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1507   if (!pcbddc->current_level) {
1508     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1509     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1510     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1511     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1512   } else {
1513     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1514     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1515     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1516     len -= 15; /* remove "pc_bddc_coarse_" */
1517     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1518     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1519     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1520     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1521     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1522     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1523     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1524     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1525   }
1526 
1527   /* DIRICHLET PROBLEM */
1528   if (dirichlet) {
1529     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1530     if (pcbddc->local_mat->symmetric_set) {
1531       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1532     }
1533     /* Matrix for Dirichlet problem is pcis->A_II */
1534     n_D = pcis->n - pcis->n_B;
1535     if (!pcbddc->ksp_D) { /* create object if not yet build */
1536       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1537       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1538       /* default */
1539       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1540       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1541       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1542       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1543       if (issbaij) {
1544         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1545       } else {
1546         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1547       }
1548       /* Allow user's customization */
1549       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1550       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1551     }
1552     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1553     if (sub_schurs->reuse_mumps) {
1554       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1555 
1556       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1557     }
1558     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1559     if (!n_D) {
1560       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1561       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1562     }
1563     /* Set Up KSP for Dirichlet problem of BDDC */
1564     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1565     /* set ksp_D into pcis data */
1566     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1567     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1568     pcis->ksp_D = pcbddc->ksp_D;
1569   }
1570 
1571   /* NEUMANN PROBLEM */
1572   A_RR = 0;
1573   if (neumann) {
1574     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1575     PetscInt        ibs,mbs;
1576     PetscBool       issbaij;
1577     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1578     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1579     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1580     if (pcbddc->ksp_R) { /* already created ksp */
1581       PetscInt nn_R;
1582       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1583       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1584       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1585       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1586         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1587         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1588         reuse = MAT_INITIAL_MATRIX;
1589       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1590         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1591           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1592           reuse = MAT_INITIAL_MATRIX;
1593         } else { /* safe to reuse the matrix */
1594           reuse = MAT_REUSE_MATRIX;
1595         }
1596       }
1597       /* last check */
1598       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1599         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1600         reuse = MAT_INITIAL_MATRIX;
1601       }
1602     } else { /* first time, so we need to create the matrix */
1603       reuse = MAT_INITIAL_MATRIX;
1604     }
1605     /* extract A_RR */
1606     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1607     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1608     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1609     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1610       if (matis->A == pcbddc->local_mat) {
1611         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1612         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1613       } else {
1614         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1615       }
1616     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1617       if (matis->A == pcbddc->local_mat) {
1618         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1619         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1620       } else {
1621         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1622       }
1623     }
1624     if (!sub_schurs->reuse_mumps) {
1625       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1626       if (pcbddc->local_mat->symmetric_set) {
1627         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1628       }
1629     } else {
1630       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1631 
1632       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1633       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1634       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1635     }
1636     if (!pcbddc->ksp_R) { /* create object if not present */
1637       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1638       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1639       /* default */
1640       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1641       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1642       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1643       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1644       if (issbaij) {
1645         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1646       } else {
1647         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1648       }
1649       /* Allow user's customization */
1650       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1651       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1652     }
1653     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1654     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1655     if (!n_R) {
1656       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1657       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1658     }
1659     /* Reuse MUMPS solver if it is present */
1660     if (sub_schurs->reuse_mumps) {
1661       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1662 
1663       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1664     }
1665     /* Set Up KSP for Neumann problem of BDDC */
1666     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1667   }
1668   /* free Neumann problem's matrix */
1669   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1670 
1671   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1672   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1673     if (pcbddc->dbg_flag) {
1674       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1675       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1676       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1677     }
1678     if (dirichlet) { /* Dirichlet */
1679       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1680       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1681       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1682       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1683       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1684       /* need to be adapted? */
1685       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1686       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1687       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1688       /* print info */
1689       if (pcbddc->dbg_flag) {
1690         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1691         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1692       }
1693       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1694         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
1695       }
1696     }
1697     if (neumann) { /* Neumann */
1698       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1699       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1700       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1701       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1702       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1703       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1704       /* need to be adapted? */
1705       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1706       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1707       /* print info */
1708       if (pcbddc->dbg_flag) {
1709         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1710         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1711       }
1712       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1713         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
1714       }
1715     }
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1722 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1723 {
1724   PetscErrorCode  ierr;
1725   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1726   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1727 
1728   PetscFunctionBegin;
1729   if (!sub_schurs->reuse_mumps) {
1730     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1731   }
1732   if (!pcbddc->switch_static) {
1733     if (applytranspose && pcbddc->local_auxmat1) {
1734       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1735       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1736     }
1737     if (!sub_schurs->reuse_mumps) {
1738       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1739       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1740     } else {
1741       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1742 
1743       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1744       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1745     }
1746   } else {
1747     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1748     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1749     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1750     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1751     if (applytranspose && pcbddc->local_auxmat1) {
1752       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1753       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1754       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1755       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1756     }
1757   }
1758   if (!sub_schurs->reuse_mumps) {
1759     if (applytranspose) {
1760       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1761     } else {
1762       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1763     }
1764 #if defined(PETSC_HAVE_MUMPS)
1765   } else {
1766     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1767 
1768     if (applytranspose) {
1769       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1770     } else {
1771       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1772     }
1773 #endif
1774   }
1775   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1776   if (!pcbddc->switch_static) {
1777     if (!sub_schurs->reuse_mumps) {
1778       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1779       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1780     } else {
1781       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1782 
1783       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1784       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1785     }
1786     if (!applytranspose && pcbddc->local_auxmat1) {
1787       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1788       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1789     }
1790   } else {
1791     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1792     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1793     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1794     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1795     if (!applytranspose && pcbddc->local_auxmat1) {
1796       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1797       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1798     }
1799     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1800     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1801     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1802     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1803   }
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1808 #undef __FUNCT__
1809 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1810 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1811 {
1812   PetscErrorCode ierr;
1813   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1814   PC_IS*            pcis = (PC_IS*)  (pc->data);
1815   const PetscScalar zero = 0.0;
1816 
1817   PetscFunctionBegin;
1818   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1819   if (applytranspose) {
1820     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1821     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1822   } else {
1823     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1824     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1825   }
1826   /* start communications from local primal nodes to rhs of coarse solver */
1827   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1828   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1829   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1830 
1831   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1832   /* TODO remove null space when doing multilevel */
1833   if (pcbddc->coarse_ksp) {
1834     Vec rhs,sol;
1835 
1836     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1837     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1838     if (applytranspose) {
1839       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1840     } else {
1841       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1842     }
1843   }
1844 
1845   /* Local solution on R nodes */
1846   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1847     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1848   }
1849 
1850   /* communications from coarse sol to local primal nodes */
1851   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1852   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1853 
1854   /* Sum contributions from two levels */
1855   if (applytranspose) {
1856     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1857     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1858   } else {
1859     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1860     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1861   }
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 #undef __FUNCT__
1866 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1867 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1868 {
1869   PetscErrorCode ierr;
1870   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1871   PetscScalar    *array;
1872   Vec            from,to;
1873 
1874   PetscFunctionBegin;
1875   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1876     from = pcbddc->coarse_vec;
1877     to = pcbddc->vec1_P;
1878     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1879       Vec tvec;
1880 
1881       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1882       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1883       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1884       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1885       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1886       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1887     }
1888   } else { /* from local to global -> put data in coarse right hand side */
1889     from = pcbddc->vec1_P;
1890     to = pcbddc->coarse_vec;
1891   }
1892   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1898 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1899 {
1900   PetscErrorCode ierr;
1901   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1902   PetscScalar    *array;
1903   Vec            from,to;
1904 
1905   PetscFunctionBegin;
1906   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1907     from = pcbddc->coarse_vec;
1908     to = pcbddc->vec1_P;
1909   } else { /* from local to global -> put data in coarse right hand side */
1910     from = pcbddc->vec1_P;
1911     to = pcbddc->coarse_vec;
1912   }
1913   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1914   if (smode == SCATTER_FORWARD) {
1915     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1916       Vec tvec;
1917 
1918       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1919       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1920       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1921       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1922     }
1923   } else {
1924     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1925      ierr = VecResetArray(from);CHKERRQ(ierr);
1926     }
1927   }
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 /* uncomment for testing purposes */
1932 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1933 #undef __FUNCT__
1934 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1935 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1936 {
1937   PetscErrorCode    ierr;
1938   PC_IS*            pcis = (PC_IS*)(pc->data);
1939   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1940   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1941   /* one and zero */
1942   PetscScalar       one=1.0,zero=0.0;
1943   /* space to store constraints and their local indices */
1944   PetscScalar       *constraints_data;
1945   PetscInt          *constraints_idxs,*constraints_idxs_B;
1946   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1947   PetscInt          *constraints_n;
1948   /* iterators */
1949   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1950   /* BLAS integers */
1951   PetscBLASInt      lwork,lierr;
1952   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1953   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1954   /* reuse */
1955   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1956   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1957   /* change of basis */
1958   PetscBool         qr_needed;
1959   PetscBT           change_basis,qr_needed_idx;
1960   /* auxiliary stuff */
1961   PetscInt          *nnz,*is_indices;
1962   PetscInt          ncc;
1963   /* some quantities */
1964   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1965   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1966 
1967   PetscFunctionBegin;
1968   /* Destroy Mat objects computed previously */
1969   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1970   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1971   /* save info on constraints from previous setup (if any) */
1972   olocal_primal_size = pcbddc->local_primal_size;
1973   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1974   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1975   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1976   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1977   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1978   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1979 
1980   /* print some info */
1981   if (pcbddc->dbg_flag) {
1982     IS       vertices;
1983     PetscInt nv,nedges,nfaces;
1984     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1985     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1986     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1987     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1988     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1989     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1990     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1991     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1992     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1993   }
1994 
1995   if (!pcbddc->adaptive_selection) {
1996     IS           ISForVertices,*ISForFaces,*ISForEdges;
1997     MatNullSpace nearnullsp;
1998     const Vec    *nearnullvecs;
1999     Vec          *localnearnullsp;
2000     PetscScalar  *array;
2001     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2002     PetscBool    nnsp_has_cnst;
2003     /* LAPACK working arrays for SVD or POD */
2004     PetscBool    skip_lapack,boolforchange;
2005     PetscScalar  *work;
2006     PetscReal    *singular_vals;
2007 #if defined(PETSC_USE_COMPLEX)
2008     PetscReal    *rwork;
2009 #endif
2010 #if defined(PETSC_MISSING_LAPACK_GESVD)
2011     PetscScalar  *temp_basis,*correlation_mat;
2012 #else
2013     PetscBLASInt dummy_int=1;
2014     PetscScalar  dummy_scalar=1.;
2015 #endif
2016 
2017     /* Get index sets for faces, edges and vertices from graph */
2018     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2019     /* free unneeded index sets */
2020     if (!pcbddc->use_vertices) {
2021       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2022     }
2023     if (!pcbddc->use_edges) {
2024       for (i=0;i<n_ISForEdges;i++) {
2025         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2026       }
2027       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2028       n_ISForEdges = 0;
2029     }
2030     if (!pcbddc->use_faces) {
2031       for (i=0;i<n_ISForFaces;i++) {
2032         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2033       }
2034       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2035       n_ISForFaces = 0;
2036     }
2037 
2038 #if defined(PETSC_USE_DEBUG)
2039     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2040        Also use_change_of_basis should be consistent among processors */
2041     if (pcbddc->NullSpace) {
2042       PetscBool tbool[2],gbool[2];
2043 
2044       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2045         pcbddc->use_change_of_basis = PETSC_TRUE;
2046         if (!ISForEdges) {
2047           pcbddc->use_change_on_faces = PETSC_TRUE;
2048         }
2049       }
2050       tbool[0] = pcbddc->use_change_of_basis;
2051       tbool[1] = pcbddc->use_change_on_faces;
2052       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2053       pcbddc->use_change_of_basis = gbool[0];
2054       pcbddc->use_change_on_faces = gbool[1];
2055     }
2056 #endif
2057 
2058     /* check if near null space is attached to global mat */
2059     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2060     if (nearnullsp) {
2061       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2062       /* remove any stored info */
2063       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2064       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2065       /* store information for BDDC solver reuse */
2066       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2067       pcbddc->onearnullspace = nearnullsp;
2068       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2069       for (i=0;i<nnsp_size;i++) {
2070         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2071       }
2072     } else { /* if near null space is not provided BDDC uses constants by default */
2073       nnsp_size = 0;
2074       nnsp_has_cnst = PETSC_TRUE;
2075     }
2076     /* get max number of constraints on a single cc */
2077     max_constraints = nnsp_size;
2078     if (nnsp_has_cnst) max_constraints++;
2079 
2080     /*
2081          Evaluate maximum storage size needed by the procedure
2082          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2083          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2084          There can be multiple constraints per connected component
2085                                                                                                                                                            */
2086     n_vertices = 0;
2087     if (ISForVertices) {
2088       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2089     }
2090     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2091     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2092 
2093     total_counts = n_ISForFaces+n_ISForEdges;
2094     total_counts *= max_constraints;
2095     total_counts += n_vertices;
2096     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2097 
2098     total_counts = 0;
2099     max_size_of_constraint = 0;
2100     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2101       IS used_is;
2102       if (i<n_ISForEdges) {
2103         used_is = ISForEdges[i];
2104       } else {
2105         used_is = ISForFaces[i-n_ISForEdges];
2106       }
2107       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2108       total_counts += j;
2109       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2110     }
2111     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2112 
2113     /* get local part of global near null space vectors */
2114     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2115     for (k=0;k<nnsp_size;k++) {
2116       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2117       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2118       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2119     }
2120 
2121     /* whether or not to skip lapack calls */
2122     skip_lapack = PETSC_TRUE;
2123     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2124 
2125     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2126     if (!skip_lapack) {
2127       PetscScalar temp_work;
2128 
2129 #if defined(PETSC_MISSING_LAPACK_GESVD)
2130       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2131       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2132       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2133       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2134 #if defined(PETSC_USE_COMPLEX)
2135       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2136 #endif
2137       /* now we evaluate the optimal workspace using query with lwork=-1 */
2138       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2139       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2140       lwork = -1;
2141       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2142 #if !defined(PETSC_USE_COMPLEX)
2143       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2144 #else
2145       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2146 #endif
2147       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2148       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2149 #else /* on missing GESVD */
2150       /* SVD */
2151       PetscInt max_n,min_n;
2152       max_n = max_size_of_constraint;
2153       min_n = max_constraints;
2154       if (max_size_of_constraint < max_constraints) {
2155         min_n = max_size_of_constraint;
2156         max_n = max_constraints;
2157       }
2158       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2159 #if defined(PETSC_USE_COMPLEX)
2160       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2161 #endif
2162       /* now we evaluate the optimal workspace using query with lwork=-1 */
2163       lwork = -1;
2164       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2165       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2166       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2167       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2168 #if !defined(PETSC_USE_COMPLEX)
2169       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2170 #else
2171       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2172 #endif
2173       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2174       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2175 #endif /* on missing GESVD */
2176       /* Allocate optimal workspace */
2177       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2178       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2179     }
2180     /* Now we can loop on constraining sets */
2181     total_counts = 0;
2182     constraints_idxs_ptr[0] = 0;
2183     constraints_data_ptr[0] = 0;
2184     /* vertices */
2185     if (n_vertices) {
2186       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2187       if (nnsp_has_cnst) { /* it considers all possible vertices */
2188         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2189         for (i=0;i<n_vertices;i++) {
2190           constraints_n[total_counts] = 1;
2191           constraints_data[total_counts] = 1.0;
2192           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2193           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2194           total_counts++;
2195         }
2196       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2197         PetscBool used_vertex;
2198         for (i=0;i<n_vertices;i++) {
2199           used_vertex = PETSC_FALSE;
2200           k = 0;
2201           while (!used_vertex && k<nnsp_size) {
2202             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2203             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2204               constraints_n[total_counts] = 1;
2205               constraints_idxs[total_counts] = is_indices[i];
2206               constraints_data[total_counts] = 1.0;
2207               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2208               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2209               total_counts++;
2210               used_vertex = PETSC_TRUE;
2211             }
2212             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2213             k++;
2214           }
2215         }
2216       }
2217       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2218       n_vertices = total_counts;
2219     }
2220 
2221     /* edges and faces */
2222     total_counts_cc = total_counts;
2223     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2224       IS        used_is;
2225       PetscBool idxs_copied = PETSC_FALSE;
2226 
2227       if (ncc<n_ISForEdges) {
2228         used_is = ISForEdges[ncc];
2229         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2230       } else {
2231         used_is = ISForFaces[ncc-n_ISForEdges];
2232         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2233       }
2234       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2235 
2236       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2237       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2238       /* change of basis should not be performed on local periodic nodes */
2239       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2240       if (nnsp_has_cnst) {
2241         PetscScalar quad_value;
2242 
2243         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2244         idxs_copied = PETSC_TRUE;
2245 
2246         if (!pcbddc->use_nnsp_true) {
2247           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2248         } else {
2249           quad_value = 1.0;
2250         }
2251         for (j=0;j<size_of_constraint;j++) {
2252           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2253         }
2254         temp_constraints++;
2255         total_counts++;
2256       }
2257       for (k=0;k<nnsp_size;k++) {
2258         PetscReal real_value;
2259         PetscScalar *ptr_to_data;
2260 
2261         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2262         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2263         for (j=0;j<size_of_constraint;j++) {
2264           ptr_to_data[j] = array[is_indices[j]];
2265         }
2266         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2267         /* check if array is null on the connected component */
2268         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2269         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2270         if (real_value > 0.0) { /* keep indices and values */
2271           temp_constraints++;
2272           total_counts++;
2273           if (!idxs_copied) {
2274             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2275             idxs_copied = PETSC_TRUE;
2276           }
2277         }
2278       }
2279       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2280       valid_constraints = temp_constraints;
2281       if (!pcbddc->use_nnsp_true && temp_constraints) {
2282         if (temp_constraints == 1) { /* just normalize the constraint */
2283           PetscScalar norm,*ptr_to_data;
2284 
2285           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2286           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2287           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2288           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2289           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2290         } else { /* perform SVD */
2291           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2292           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2293 
2294 #if defined(PETSC_MISSING_LAPACK_GESVD)
2295           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2296              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2297              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2298                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2299                 from that computed using LAPACKgesvd
2300              -> This is due to a different computation of eigenvectors in LAPACKheev
2301              -> The quality of the POD-computed basis will be the same */
2302           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2303           /* Store upper triangular part of correlation matrix */
2304           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2305           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2306           for (j=0;j<temp_constraints;j++) {
2307             for (k=0;k<j+1;k++) {
2308               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2309             }
2310           }
2311           /* compute eigenvalues and eigenvectors of correlation matrix */
2312           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2313           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2314 #if !defined(PETSC_USE_COMPLEX)
2315           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2316 #else
2317           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2318 #endif
2319           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2320           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2321           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2322           j = 0;
2323           while (j < temp_constraints && singular_vals[j] < tol) j++;
2324           total_counts = total_counts-j;
2325           valid_constraints = temp_constraints-j;
2326           /* scale and copy POD basis into used quadrature memory */
2327           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2328           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2329           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2330           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2331           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2332           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2333           if (j<temp_constraints) {
2334             PetscInt ii;
2335             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2336             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2337             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2338             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2339             for (k=0;k<temp_constraints-j;k++) {
2340               for (ii=0;ii<size_of_constraint;ii++) {
2341                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2342               }
2343             }
2344           }
2345 #else  /* on missing GESVD */
2346           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2347           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2348           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2349           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2350 #if !defined(PETSC_USE_COMPLEX)
2351           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2352 #else
2353           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2354 #endif
2355           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2356           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2357           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2358           k = temp_constraints;
2359           if (k > size_of_constraint) k = size_of_constraint;
2360           j = 0;
2361           while (j < k && singular_vals[k-j-1] < tol) j++;
2362           valid_constraints = k-j;
2363           total_counts = total_counts-temp_constraints+valid_constraints;
2364 #endif /* on missing GESVD */
2365         }
2366       }
2367       /* update pointers information */
2368       if (valid_constraints) {
2369         constraints_n[total_counts_cc] = valid_constraints;
2370         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2371         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2372         /* set change_of_basis flag */
2373         if (boolforchange) {
2374           PetscBTSet(change_basis,total_counts_cc);
2375         }
2376         total_counts_cc++;
2377       }
2378     }
2379     /* free workspace */
2380     if (!skip_lapack) {
2381       ierr = PetscFree(work);CHKERRQ(ierr);
2382 #if defined(PETSC_USE_COMPLEX)
2383       ierr = PetscFree(rwork);CHKERRQ(ierr);
2384 #endif
2385       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2386 #if defined(PETSC_MISSING_LAPACK_GESVD)
2387       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2388       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2389 #endif
2390     }
2391     for (k=0;k<nnsp_size;k++) {
2392       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2393     }
2394     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2395     /* free index sets of faces, edges and vertices */
2396     for (i=0;i<n_ISForFaces;i++) {
2397       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2398     }
2399     if (n_ISForFaces) {
2400       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2401     }
2402     for (i=0;i<n_ISForEdges;i++) {
2403       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2404     }
2405     if (n_ISForEdges) {
2406       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2407     }
2408     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2409   } else {
2410     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2411 
2412     total_counts = 0;
2413     n_vertices = 0;
2414     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2415       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2416     }
2417     max_constraints = 0;
2418     total_counts_cc = 0;
2419     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2420       total_counts += pcbddc->adaptive_constraints_n[i];
2421       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2422       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2423     }
2424     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2425     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2426     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2427     constraints_data = pcbddc->adaptive_constraints_data;
2428     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2429     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2430     total_counts_cc = 0;
2431     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2432       if (pcbddc->adaptive_constraints_n[i]) {
2433         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2434       }
2435     }
2436 #if 0
2437     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2438     for (i=0;i<total_counts_cc;i++) {
2439       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2440       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2441       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2442         printf(" %d",constraints_idxs[j]);
2443       }
2444       printf("\n");
2445       printf("number of cc: %d\n",constraints_n[i]);
2446     }
2447     for (i=0;i<n_vertices;i++) {
2448       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2449     }
2450     for (i=0;i<sub_schurs->n_subs;i++) {
2451       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2452     }
2453 #endif
2454 
2455     max_size_of_constraint = 0;
2456     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2457     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2458     /* Change of basis */
2459     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2460     if (pcbddc->use_change_of_basis) {
2461       for (i=0;i<sub_schurs->n_subs;i++) {
2462         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2463           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2464         }
2465       }
2466     }
2467   }
2468   pcbddc->local_primal_size = total_counts;
2469   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2470 
2471   /* map constraints_idxs in boundary numbering */
2472   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2473   if (i != constraints_idxs_ptr[total_counts_cc]) {
2474     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2475   }
2476 
2477   /* Create constraint matrix */
2478   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2479   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2480   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2481 
2482   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2483   /* determine if a QR strategy is needed for change of basis */
2484   qr_needed = PETSC_FALSE;
2485   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2486   total_primal_vertices=0;
2487   pcbddc->local_primal_size_cc = 0;
2488   for (i=0;i<total_counts_cc;i++) {
2489     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2490     if (size_of_constraint == 1) {
2491       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2492       pcbddc->local_primal_size_cc += 1;
2493     } else if (PetscBTLookup(change_basis,i)) {
2494       for (k=0;k<constraints_n[i];k++) {
2495         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2496       }
2497       pcbddc->local_primal_size_cc += constraints_n[i];
2498       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2499         PetscBTSet(qr_needed_idx,i);
2500         qr_needed = PETSC_TRUE;
2501       }
2502     } else {
2503       pcbddc->local_primal_size_cc += 1;
2504     }
2505   }
2506   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2507   pcbddc->n_vertices = total_primal_vertices;
2508   /* permute indices in order to have a sorted set of vertices */
2509   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2510 
2511   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2512   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2513   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2514 
2515   /* nonzero structure of constraint matrix */
2516   /* and get reference dof for local constraints */
2517   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2518   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2519 
2520   j = total_primal_vertices;
2521   total_counts = total_primal_vertices;
2522   cum = total_primal_vertices;
2523   for (i=n_vertices;i<total_counts_cc;i++) {
2524     if (!PetscBTLookup(change_basis,i)) {
2525       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2526       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2527       cum++;
2528       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2529       for (k=0;k<constraints_n[i];k++) {
2530         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2531         nnz[j+k] = size_of_constraint;
2532       }
2533       j += constraints_n[i];
2534     }
2535   }
2536   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2537   ierr = PetscFree(nnz);CHKERRQ(ierr);
2538 
2539   /* set values in constraint matrix */
2540   for (i=0;i<total_primal_vertices;i++) {
2541     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2542   }
2543   total_counts = total_primal_vertices;
2544   for (i=n_vertices;i<total_counts_cc;i++) {
2545     if (!PetscBTLookup(change_basis,i)) {
2546       PetscInt *cols;
2547 
2548       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2549       cols = constraints_idxs+constraints_idxs_ptr[i];
2550       for (k=0;k<constraints_n[i];k++) {
2551         PetscInt    row = total_counts+k;
2552         PetscScalar *vals;
2553 
2554         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2555         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2556       }
2557       total_counts += constraints_n[i];
2558     }
2559   }
2560   /* assembling */
2561   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2562   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2563 
2564   /*
2565   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2566   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2567   */
2568   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2569   if (pcbddc->use_change_of_basis) {
2570     /* dual and primal dofs on a single cc */
2571     PetscInt     dual_dofs,primal_dofs;
2572     /* working stuff for GEQRF */
2573     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2574     PetscBLASInt lqr_work;
2575     /* working stuff for UNGQR */
2576     PetscScalar  *gqr_work,lgqr_work_t;
2577     PetscBLASInt lgqr_work;
2578     /* working stuff for TRTRS */
2579     PetscScalar  *trs_rhs;
2580     PetscBLASInt Blas_NRHS;
2581     /* pointers for values insertion into change of basis matrix */
2582     PetscInt     *start_rows,*start_cols;
2583     PetscScalar  *start_vals;
2584     /* working stuff for values insertion */
2585     PetscBT      is_primal;
2586     PetscInt     *aux_primal_numbering_B;
2587     /* matrix sizes */
2588     PetscInt     global_size,local_size;
2589     /* temporary change of basis */
2590     Mat          localChangeOfBasisMatrix;
2591     /* extra space for debugging */
2592     PetscScalar  *dbg_work;
2593 
2594     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2595     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2596     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2597     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2598     /* nonzeros for local mat */
2599     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2600     for (i=0;i<pcis->n;i++) nnz[i]=1;
2601     for (i=n_vertices;i<total_counts_cc;i++) {
2602       if (PetscBTLookup(change_basis,i)) {
2603         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2604         if (PetscBTLookup(qr_needed_idx,i)) {
2605           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2606         } else {
2607           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2608           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2609         }
2610       }
2611     }
2612     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2613     ierr = PetscFree(nnz);CHKERRQ(ierr);
2614     /* Set initial identity in the matrix */
2615     for (i=0;i<pcis->n;i++) {
2616       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2617     }
2618 
2619     if (pcbddc->dbg_flag) {
2620       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2621       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2622     }
2623 
2624 
2625     /* Now we loop on the constraints which need a change of basis */
2626     /*
2627        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2628        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2629 
2630        Basic blocks of change of basis matrix T computed by
2631 
2632           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2633 
2634             | 1        0   ...        0         s_1/S |
2635             | 0        1   ...        0         s_2/S |
2636             |              ...                        |
2637             | 0        ...            1     s_{n-1}/S |
2638             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2639 
2640             with S = \sum_{i=1}^n s_i^2
2641             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2642                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2643 
2644           - QR decomposition of constraints otherwise
2645     */
2646     if (qr_needed) {
2647       /* space to store Q */
2648       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2649       /* first we issue queries for optimal work */
2650       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2651       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2652       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2653       lqr_work = -1;
2654       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2655       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2656       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2657       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2658       lgqr_work = -1;
2659       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2660       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2661       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2662       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2663       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2664       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2665       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2666       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2667       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2668       /* array to store scaling factors for reflectors */
2669       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2670       /* array to store rhs and solution of triangular solver */
2671       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2672       /* allocating workspace for check */
2673       if (pcbddc->dbg_flag) {
2674         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2675       }
2676     }
2677     /* array to store whether a node is primal or not */
2678     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2679     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2680     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2681     if (i != total_primal_vertices) {
2682       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2683     }
2684     for (i=0;i<total_primal_vertices;i++) {
2685       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2686     }
2687     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2688 
2689     /* loop on constraints and see whether or not they need a change of basis and compute it */
2690     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2691       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2692       if (PetscBTLookup(change_basis,total_counts)) {
2693         /* get constraint info */
2694         primal_dofs = constraints_n[total_counts];
2695         dual_dofs = size_of_constraint-primal_dofs;
2696 
2697         if (pcbddc->dbg_flag) {
2698           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2699         }
2700 
2701         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2702 
2703           /* copy quadrature constraints for change of basis check */
2704           if (pcbddc->dbg_flag) {
2705             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2706           }
2707           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2708           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2709 
2710           /* compute QR decomposition of constraints */
2711           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2712           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2713           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2714           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2715           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2716           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2717           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2718 
2719           /* explictly compute R^-T */
2720           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2721           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2722           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2723           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2724           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2725           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2726           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2727           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2728           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2729           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2730 
2731           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2732           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2733           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2734           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2735           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2736           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2737           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2738           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2739           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2740 
2741           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2742              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2743              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2744           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2745           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2746           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2747           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2748           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2749           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2750           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2751           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2752           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2753           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2754 
2755           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2756           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2757           /* insert cols for primal dofs */
2758           for (j=0;j<primal_dofs;j++) {
2759             start_vals = &qr_basis[j*size_of_constraint];
2760             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2761             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2762           }
2763           /* insert cols for dual dofs */
2764           for (j=0,k=0;j<dual_dofs;k++) {
2765             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2766               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2767               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2768               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2769               j++;
2770             }
2771           }
2772 
2773           /* check change of basis */
2774           if (pcbddc->dbg_flag) {
2775             PetscInt   ii,jj;
2776             PetscBool valid_qr=PETSC_TRUE;
2777             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2778             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2779             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2780             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2781             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2782             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2783             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2784             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2785             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2786             for (jj=0;jj<size_of_constraint;jj++) {
2787               for (ii=0;ii<primal_dofs;ii++) {
2788                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2789                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2790               }
2791             }
2792             if (!valid_qr) {
2793               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2794               for (jj=0;jj<size_of_constraint;jj++) {
2795                 for (ii=0;ii<primal_dofs;ii++) {
2796                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2797                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2798                   }
2799                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2800                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2801                   }
2802                 }
2803               }
2804             } else {
2805               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2806             }
2807           }
2808         } else { /* simple transformation block */
2809           PetscInt    row,col;
2810           PetscScalar val,norm;
2811 
2812           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2813           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2814           for (j=0;j<size_of_constraint;j++) {
2815             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2816             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2817             if (!PetscBTLookup(is_primal,row_B)) {
2818               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2819               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2820               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2821             } else {
2822               for (k=0;k<size_of_constraint;k++) {
2823                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2824                 if (row != col) {
2825                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2826                 } else {
2827                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2828                 }
2829                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2830               }
2831             }
2832           }
2833           if (pcbddc->dbg_flag) {
2834             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2835           }
2836         }
2837       } else {
2838         if (pcbddc->dbg_flag) {
2839           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2840         }
2841       }
2842     }
2843 
2844     /* free workspace */
2845     if (qr_needed) {
2846       if (pcbddc->dbg_flag) {
2847         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2848       }
2849       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2850       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2851       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2852       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2853       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2854     }
2855     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2856     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2857     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2858 
2859     /* assembling of global change of variable */
2860     {
2861       Mat      tmat;
2862       PetscInt bs;
2863 
2864       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2865       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2866       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2867       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2868       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2869       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2870       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2871       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2872       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2873       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2874       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2875       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2876       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2877       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2878       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2879       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2880       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2881       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2882     }
2883     /* check */
2884     if (pcbddc->dbg_flag) {
2885       PetscReal error;
2886       Vec       x,x_change;
2887 
2888       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2889       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2890       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2891       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2892       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2893       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2894       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2895       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2896       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2897       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2898       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2899       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2900       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2901       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2902       ierr = VecDestroy(&x);CHKERRQ(ierr);
2903       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2904     }
2905 
2906     /* adapt sub_schurs computed (if any) */
2907     if (pcbddc->use_deluxe_scaling) {
2908       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2909       if (sub_schurs->S_Ej_all) {
2910         Mat S_new,tmat;
2911         IS is_all_N;
2912 
2913         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2914         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2915         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2916         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2917         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2918         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2919         sub_schurs->S_Ej_all = S_new;
2920         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2921         if (sub_schurs->sum_S_Ej_all) {
2922           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2923           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2924           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2925           sub_schurs->sum_S_Ej_all = S_new;
2926           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2927         }
2928         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2929       }
2930     }
2931     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2932   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2933     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2934     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2935   }
2936 
2937   /* set up change of basis context */
2938   if (pcbddc->ChangeOfBasisMatrix) {
2939     PCBDDCChange_ctx change_ctx;
2940 
2941     if (!pcbddc->new_global_mat) {
2942       PetscInt global_size,local_size;
2943 
2944       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2945       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2946       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2947       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2948       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2949       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2950       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2951       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2952       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2953     } else {
2954       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2955       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2956       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2957     }
2958     if (!pcbddc->user_ChangeOfBasisMatrix) {
2959       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2960       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2961     } else {
2962       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2963       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2964     }
2965     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2966     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2967     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2968     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2969   }
2970 
2971   /* check if a new primal space has been introduced */
2972   pcbddc->new_primal_space_local = PETSC_TRUE;
2973   if (olocal_primal_size == pcbddc->local_primal_size) {
2974     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2975     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2976     if (!pcbddc->new_primal_space_local) {
2977       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2978       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2979     }
2980   }
2981   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2982   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2983   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2984 
2985   /* flush dbg viewer */
2986   if (pcbddc->dbg_flag) {
2987     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2988   }
2989 
2990   /* free workspace */
2991   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2992   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2993   if (!pcbddc->adaptive_selection) {
2994     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2995     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2996   } else {
2997     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2998                       pcbddc->adaptive_constraints_idxs_ptr,
2999                       pcbddc->adaptive_constraints_data_ptr,
3000                       pcbddc->adaptive_constraints_idxs,
3001                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3002     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3003     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3004   }
3005   PetscFunctionReturn(0);
3006 }
3007 
3008 #undef __FUNCT__
3009 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3010 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3011 {
3012   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3013   PC_IS       *pcis = (PC_IS*)pc->data;
3014   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3015   PetscInt    ierr,i,vertex_size,N;
3016   PetscViewer viewer=pcbddc->dbg_viewer;
3017 
3018   PetscFunctionBegin;
3019   /* Reset previously computed graph */
3020   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3021   /* Init local Graph struct */
3022   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3023   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
3024 
3025   /* Check validity of the csr graph passed in by the user */
3026   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3027     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3028   }
3029 
3030   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3031   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3032     PetscInt  *xadj,*adjncy;
3033     PetscInt  nvtxs;
3034     PetscBool flg_row=PETSC_FALSE;
3035 
3036     if (pcbddc->use_local_adj) {
3037 
3038       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3039       if (flg_row) {
3040         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3041         pcbddc->computed_rowadj = PETSC_TRUE;
3042       }
3043       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3044     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3045       IS                     is_dummy;
3046       ISLocalToGlobalMapping l2gmap_dummy;
3047       PetscInt               j,sum;
3048       PetscInt               *cxadj,*cadjncy;
3049       const PetscInt         *idxs;
3050       PCBDDCGraph            graph;
3051       PetscBT                is_on_boundary;
3052 
3053       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3054       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3055       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3056       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3057       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3058       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3059       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3060       if (flg_row) {
3061         graph->xadj = xadj;
3062         graph->adjncy = adjncy;
3063       }
3064       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3065       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3066       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3067 
3068       if (pcbddc->dbg_flag) {
3069         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3070         for (i=0;i<graph->ncc;i++) {
3071           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3072         }
3073       }
3074 
3075       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3076       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3077       for (i=0;i<pcis->n_B;i++) {
3078         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3079       }
3080       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3081 
3082       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3083       sum = 0;
3084       for (i=0;i<graph->ncc;i++) {
3085         PetscInt sizecc = 0;
3086         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3087           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3088             sizecc++;
3089           }
3090         }
3091         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3092           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3093             cxadj[graph->queue[j]] = sizecc;
3094           }
3095         }
3096         sum += sizecc*sizecc;
3097       }
3098       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3099       sum = 0;
3100       for (i=0;i<pcis->n;i++) {
3101         PetscInt temp = cxadj[i];
3102         cxadj[i] = sum;
3103         sum += temp;
3104       }
3105       cxadj[pcis->n] = sum;
3106       for (i=0;i<graph->ncc;i++) {
3107         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3108           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3109             PetscInt k,sizecc = 0;
3110             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3111               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3112                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3113                 sizecc++;
3114               }
3115             }
3116           }
3117         }
3118       }
3119       if (sum) {
3120         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3121       } else {
3122         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3123         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3124       }
3125       graph->xadj = 0;
3126       graph->adjncy = 0;
3127       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3128       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3129     }
3130   }
3131   if (pcbddc->dbg_flag) {
3132     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3133   }
3134 
3135   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3136   vertex_size = 1;
3137   if (pcbddc->user_provided_isfordofs) {
3138     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3139       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3140       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3141         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3142         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3143       }
3144       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3145       pcbddc->n_ISForDofs = 0;
3146       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3147     }
3148     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3149     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3150   } else {
3151     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3152       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3153       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3154       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3155         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3156       }
3157     }
3158   }
3159 
3160   /* Setup of Graph */
3161   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3162     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3163   }
3164   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3165     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3166   }
3167   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3168 
3169   /* Graph's connected components analysis */
3170   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3171 
3172   /* print some info to stdout */
3173   if (pcbddc->dbg_flag) {
3174     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3175   }
3176 
3177   /* mark topography has done */
3178   pcbddc->recompute_topography = PETSC_FALSE;
3179   PetscFunctionReturn(0);
3180 }
3181 
3182 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3183 #undef __FUNCT__
3184 #define __FUNCT__ "PCBDDCSubsetNumbering"
3185 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3186 {
3187   PetscSF        sf;
3188   PetscLayout    map;
3189   const PetscInt *idxs;
3190   PetscInt       *leaf_data,*root_data,*gidxs;
3191   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3192   PetscInt       n_n,nlocals,start,first_index;
3193   PetscMPIInt    commsize;
3194   PetscBool      first_found;
3195   PetscErrorCode ierr;
3196 
3197   PetscFunctionBegin;
3198   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3199   if (subset_mult) {
3200     PetscCheckSameComm(subset,1,subset_mult,2);
3201     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3202     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3203   }
3204   /* create workspace layout for computing global indices of subset */
3205   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3206   lbounds[0] = lbounds[1] = 0;
3207   for (i=0;i<n;i++) {
3208     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3209     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3210   }
3211   lbounds[0] = -lbounds[0];
3212   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3213   gbounds[0] = -gbounds[0];
3214   N = gbounds[1] - gbounds[0] + 1;
3215   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3216   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3217   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3218   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3219   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3220 
3221   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3222   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3223   if (subset_mult) {
3224     const PetscInt* idxs_mult;
3225 
3226     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3227     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3228     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3229   } else {
3230     for (i=0;i<n;i++) leaf_data[i] = 1;
3231   }
3232   /* local size of new subset */
3233   n_n = 0;
3234   for (i=0;i<n;i++) n_n += leaf_data[i];
3235 
3236   /* global indexes in layout */
3237   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3238   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3239   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3240   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3241   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3242   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3243 
3244   /* reduce from leaves to roots */
3245   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3246   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3247   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3248 
3249   /* count indexes in local part of layout */
3250   nlocals = 0;
3251   first_index = -1;
3252   first_found = PETSC_FALSE;
3253   for (i=0;i<Nl;i++) {
3254     if (!first_found && root_data[i]) {
3255       first_found = PETSC_TRUE;
3256       first_index = i;
3257     }
3258     nlocals += root_data[i];
3259   }
3260 
3261   /* cumulative of number of indexes and size of subset without holes */
3262 #if defined(PETSC_HAVE_MPI_EXSCAN)
3263   start = 0;
3264   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3265 #else
3266   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3267   start = start-nlocals;
3268 #endif
3269 
3270   if (N_n) { /* compute total size of new subset if requested */
3271     *N_n = start + nlocals;
3272     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3273     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3274   }
3275 
3276   /* adapt root data with cumulative */
3277   if (first_found) {
3278     PetscInt old_index;
3279 
3280     root_data[first_index] += start;
3281     old_index = first_index;
3282     for (i=first_index+1;i<Nl;i++) {
3283       if (root_data[i]) {
3284         root_data[i] += root_data[old_index];
3285         old_index = i;
3286       }
3287     }
3288   }
3289 
3290   /* from roots to leaves */
3291   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3292   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3293   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3294 
3295   /* create new IS with global indexes without holes */
3296   if (subset_mult) {
3297     const PetscInt* idxs_mult;
3298     PetscInt        cum;
3299 
3300     cum = 0;
3301     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3302     for (i=0;i<n;i++) {
3303       PetscInt j;
3304       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3305     }
3306     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3307   } else {
3308     for (i=0;i<n;i++) {
3309       gidxs[i] = leaf_data[i]-1;
3310     }
3311   }
3312   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3313   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 #undef __FUNCT__
3318 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3319 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3320 {
3321   PetscInt       i,j;
3322   PetscScalar    *alphas;
3323   PetscErrorCode ierr;
3324 
3325   PetscFunctionBegin;
3326   /* this implements stabilized Gram-Schmidt */
3327   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3328   for (i=0;i<n;i++) {
3329     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3330     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3331     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3332   }
3333   ierr = PetscFree(alphas);CHKERRQ(ierr);
3334   PetscFunctionReturn(0);
3335 }
3336 
3337 #undef __FUNCT__
3338 #define __FUNCT__ "MatISGetSubassemblingPattern"
3339 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3340 {
3341   Mat_IS         *matis;
3342   IS             ranks_send_to;
3343   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3344   PetscMPIInt    size,rank,color;
3345   PetscInt       *xadj,*adjncy;
3346   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3347   PetscInt       i,local_size,threshold=0;
3348   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3349   PetscSubcomm   subcomm;
3350   PetscErrorCode ierr;
3351 
3352   PetscFunctionBegin;
3353   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3354   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3355   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3356 
3357   /* Get info on mapping */
3358   matis = (Mat_IS*)(mat->data);
3359   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3360   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3361 
3362   /* build local CSR graph of subdomains' connectivity */
3363   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3364   xadj[0] = 0;
3365   xadj[1] = PetscMax(n_neighs-1,0);
3366   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3367   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3368 
3369   if (threshold) {
3370     PetscInt xadj_count = 0;
3371     for (i=1;i<n_neighs;i++) {
3372       if (n_shared[i] > threshold) {
3373         adjncy[xadj_count] = neighs[i];
3374         adjncy_wgt[xadj_count] = n_shared[i];
3375         xadj_count++;
3376       }
3377     }
3378     xadj[1] = xadj_count;
3379   } else {
3380     if (xadj[1]) {
3381       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3382       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3383     }
3384   }
3385   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3386   if (use_square) {
3387     for (i=0;i<xadj[1];i++) {
3388       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3389     }
3390   }
3391   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3392 
3393   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3394 
3395   /*
3396     Restrict work on active processes only.
3397   */
3398   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3399   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3400   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3401   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3402   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3403   if (color) {
3404     ierr = PetscFree(xadj);CHKERRQ(ierr);
3405     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3406     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3407   } else {
3408     Mat             subdomain_adj;
3409     IS              new_ranks,new_ranks_contig;
3410     MatPartitioning partitioner;
3411     PetscInt        prank,rstart=0,rend=0;
3412     PetscInt        *is_indices,*oldranks;
3413     PetscBool       aggregate;
3414 
3415     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3416     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3417     prank = rank;
3418     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3419     /*
3420     for (i=0;i<size;i++) {
3421       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3422     }
3423     */
3424     for (i=0;i<xadj[1];i++) {
3425       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3426     }
3427     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3428     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3429     if (aggregate) {
3430       PetscInt    lrows,row,ncols,*cols;
3431       PetscMPIInt nrank;
3432       PetscScalar *vals;
3433 
3434       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3435       lrows = 0;
3436       if (nrank<redprocs) {
3437         lrows = size/redprocs;
3438         if (nrank<size%redprocs) lrows++;
3439       }
3440       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3441       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3442       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3443       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3444       row = nrank;
3445       ncols = xadj[1]-xadj[0];
3446       cols = adjncy;
3447       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3448       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3449       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3450       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3451       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3452       ierr = PetscFree(xadj);CHKERRQ(ierr);
3453       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3454       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3455       ierr = PetscFree(vals);CHKERRQ(ierr);
3456     } else {
3457       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3458     }
3459     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3460 
3461     /* Partition */
3462     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3463     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3464     if (use_vwgt) {
3465       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3466       v_wgt[0] = local_size;
3467       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3468     }
3469     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3470     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3471     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3472     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3473     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3474 
3475     /* renumber new_ranks to avoid "holes" in new set of processors */
3476     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3477     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3478     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3479     if (!redprocs) {
3480       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3481     } else {
3482       PetscInt    idxs[1];
3483       PetscMPIInt tag;
3484       MPI_Request *reqs;
3485 
3486       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3487       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3488       for (i=rstart;i<rend;i++) {
3489         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3490       }
3491       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3492       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3493       ierr = PetscFree(reqs);CHKERRQ(ierr);
3494       ranks_send_to_idx[0] = oldranks[idxs[0]];
3495     }
3496     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3497     /* clean up */
3498     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3499     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3500     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3501     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3502   }
3503   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3504 
3505   /* assemble parallel IS for sends */
3506   i = 1;
3507   if (color) i=0;
3508   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3509   /* get back IS */
3510   *is_sends = ranks_send_to;
3511   PetscFunctionReturn(0);
3512 }
3513 
3514 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3515 
3516 #undef __FUNCT__
3517 #define __FUNCT__ "MatISSubassemble"
3518 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3519 {
3520   Mat                    local_mat;
3521   Mat_IS                 *matis;
3522   IS                     is_sends_internal;
3523   PetscInt               rows,cols,new_local_rows;
3524   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3525   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3526   ISLocalToGlobalMapping l2gmap;
3527   PetscInt*              l2gmap_indices;
3528   const PetscInt*        is_indices;
3529   MatType                new_local_type;
3530   /* buffers */
3531   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3532   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3533   PetscInt               *recv_buffer_idxs_local;
3534   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3535   /* MPI */
3536   MPI_Comm               comm,comm_n;
3537   PetscSubcomm           subcomm;
3538   PetscMPIInt            n_sends,n_recvs,commsize;
3539   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3540   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3541   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3542   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3543   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3544   PetscErrorCode         ierr;
3545 
3546   PetscFunctionBegin;
3547   /* TODO: add missing checks */
3548   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3549   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3550   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3551   PetscValidLogicalCollectiveInt(mat,nis,7);
3552   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3553   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3554   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3555   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3556   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3557   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3558   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3559   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3560     PetscInt mrows,mcols,mnrows,mncols;
3561     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3562     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3563     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3564     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3565     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3566     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3567   }
3568   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3569   PetscValidLogicalCollectiveInt(mat,bs,0);
3570   /* prepare IS for sending if not provided */
3571   if (!is_sends) {
3572     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3573     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
3574   } else {
3575     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3576     is_sends_internal = is_sends;
3577   }
3578 
3579   /* get pointer of MATIS data */
3580   matis = (Mat_IS*)mat->data;
3581 
3582   /* get comm */
3583   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3584 
3585   /* compute number of sends */
3586   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3587   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3588 
3589   /* compute number of receives */
3590   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3591   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3592   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3593   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3594   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3595   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3596   ierr = PetscFree(iflags);CHKERRQ(ierr);
3597 
3598   /* restrict comm if requested */
3599   subcomm = 0;
3600   destroy_mat = PETSC_FALSE;
3601   if (restrict_comm) {
3602     PetscMPIInt color,subcommsize;
3603 
3604     color = 0;
3605     if (restrict_full) {
3606       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3607     } else {
3608       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3609     }
3610     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3611     subcommsize = commsize - subcommsize;
3612     /* check if reuse has been requested */
3613     if (reuse == MAT_REUSE_MATRIX) {
3614       if (*mat_n) {
3615         PetscMPIInt subcommsize2;
3616         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3617         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3618         comm_n = PetscObjectComm((PetscObject)*mat_n);
3619       } else {
3620         comm_n = PETSC_COMM_SELF;
3621       }
3622     } else { /* MAT_INITIAL_MATRIX */
3623       PetscMPIInt rank;
3624 
3625       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3626       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3627       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3628       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3629       comm_n = PetscSubcommChild(subcomm);
3630     }
3631     /* flag to destroy *mat_n if not significative */
3632     if (color) destroy_mat = PETSC_TRUE;
3633   } else {
3634     comm_n = comm;
3635   }
3636 
3637   /* prepare send/receive buffers */
3638   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3639   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3640   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3641   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3642   if (nis) {
3643     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3644   }
3645 
3646   /* Get data from local matrices */
3647   if (!isdense) {
3648     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3649     /* TODO: See below some guidelines on how to prepare the local buffers */
3650     /*
3651        send_buffer_vals should contain the raw values of the local matrix
3652        send_buffer_idxs should contain:
3653        - MatType_PRIVATE type
3654        - PetscInt        size_of_l2gmap
3655        - PetscInt        global_row_indices[size_of_l2gmap]
3656        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3657     */
3658   } else {
3659     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3660     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3661     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3662     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3663     send_buffer_idxs[1] = i;
3664     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3665     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3666     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3667     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3668     for (i=0;i<n_sends;i++) {
3669       ilengths_vals[is_indices[i]] = len*len;
3670       ilengths_idxs[is_indices[i]] = len+2;
3671     }
3672   }
3673   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3674   /* additional is (if any) */
3675   if (nis) {
3676     PetscMPIInt psum;
3677     PetscInt j;
3678     for (j=0,psum=0;j<nis;j++) {
3679       PetscInt plen;
3680       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3681       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3682       psum += len+1; /* indices + lenght */
3683     }
3684     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3685     for (j=0,psum=0;j<nis;j++) {
3686       PetscInt plen;
3687       const PetscInt *is_array_idxs;
3688       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3689       send_buffer_idxs_is[psum] = plen;
3690       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3691       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3692       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3693       psum += plen+1; /* indices + lenght */
3694     }
3695     for (i=0;i<n_sends;i++) {
3696       ilengths_idxs_is[is_indices[i]] = psum;
3697     }
3698     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3699   }
3700 
3701   buf_size_idxs = 0;
3702   buf_size_vals = 0;
3703   buf_size_idxs_is = 0;
3704   for (i=0;i<n_recvs;i++) {
3705     buf_size_idxs += (PetscInt)olengths_idxs[i];
3706     buf_size_vals += (PetscInt)olengths_vals[i];
3707     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3708   }
3709   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3710   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3711   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3712 
3713   /* get new tags for clean communications */
3714   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3715   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3716   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3717 
3718   /* allocate for requests */
3719   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3720   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3721   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3722   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3723   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3724   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3725 
3726   /* communications */
3727   ptr_idxs = recv_buffer_idxs;
3728   ptr_vals = recv_buffer_vals;
3729   ptr_idxs_is = recv_buffer_idxs_is;
3730   for (i=0;i<n_recvs;i++) {
3731     source_dest = onodes[i];
3732     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3733     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3734     ptr_idxs += olengths_idxs[i];
3735     ptr_vals += olengths_vals[i];
3736     if (nis) {
3737       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3738       ptr_idxs_is += olengths_idxs_is[i];
3739     }
3740   }
3741   for (i=0;i<n_sends;i++) {
3742     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3743     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3744     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3745     if (nis) {
3746       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3747     }
3748   }
3749   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3750   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3751 
3752   /* assemble new l2g map */
3753   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3754   ptr_idxs = recv_buffer_idxs;
3755   new_local_rows = 0;
3756   for (i=0;i<n_recvs;i++) {
3757     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3758     ptr_idxs += olengths_idxs[i];
3759   }
3760   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3761   ptr_idxs = recv_buffer_idxs;
3762   new_local_rows = 0;
3763   for (i=0;i<n_recvs;i++) {
3764     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3765     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3766     ptr_idxs += olengths_idxs[i];
3767   }
3768   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3769   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3770   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3771 
3772   /* infer new local matrix type from received local matrices type */
3773   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3774   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3775   if (n_recvs) {
3776     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3777     ptr_idxs = recv_buffer_idxs;
3778     for (i=0;i<n_recvs;i++) {
3779       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3780         new_local_type_private = MATAIJ_PRIVATE;
3781         break;
3782       }
3783       ptr_idxs += olengths_idxs[i];
3784     }
3785     switch (new_local_type_private) {
3786       case MATDENSE_PRIVATE:
3787         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3788           new_local_type = MATSEQAIJ;
3789           bs = 1;
3790         } else { /* if I receive only 1 dense matrix */
3791           new_local_type = MATSEQDENSE;
3792           bs = 1;
3793         }
3794         break;
3795       case MATAIJ_PRIVATE:
3796         new_local_type = MATSEQAIJ;
3797         bs = 1;
3798         break;
3799       case MATBAIJ_PRIVATE:
3800         new_local_type = MATSEQBAIJ;
3801         break;
3802       case MATSBAIJ_PRIVATE:
3803         new_local_type = MATSEQSBAIJ;
3804         break;
3805       default:
3806         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3807         break;
3808     }
3809   } else { /* by default, new_local_type is seqdense */
3810     new_local_type = MATSEQDENSE;
3811     bs = 1;
3812   }
3813 
3814   /* create MATIS object if needed */
3815   if (reuse == MAT_INITIAL_MATRIX) {
3816     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3817     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3818   } else {
3819     /* it also destroys the local matrices */
3820     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3821   }
3822   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3823   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3824 
3825   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3826 
3827   /* Global to local map of received indices */
3828   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3829   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3830   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3831 
3832   /* restore attributes -> type of incoming data and its size */
3833   buf_size_idxs = 0;
3834   for (i=0;i<n_recvs;i++) {
3835     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3836     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3837     buf_size_idxs += (PetscInt)olengths_idxs[i];
3838   }
3839   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3840 
3841   /* set preallocation */
3842   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3843   if (!newisdense) {
3844     PetscInt *new_local_nnz=0;
3845 
3846     ptr_vals = recv_buffer_vals;
3847     ptr_idxs = recv_buffer_idxs_local;
3848     if (n_recvs) {
3849       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3850     }
3851     for (i=0;i<n_recvs;i++) {
3852       PetscInt j;
3853       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3854         for (j=0;j<*(ptr_idxs+1);j++) {
3855           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3856         }
3857       } else {
3858         /* TODO */
3859       }
3860       ptr_idxs += olengths_idxs[i];
3861     }
3862     if (new_local_nnz) {
3863       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3864       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3865       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3866       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3867       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3868       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3869     } else {
3870       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3871     }
3872     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3873   } else {
3874     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3875   }
3876 
3877   /* set values */
3878   ptr_vals = recv_buffer_vals;
3879   ptr_idxs = recv_buffer_idxs_local;
3880   for (i=0;i<n_recvs;i++) {
3881     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3882       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3883       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3884       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3885       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3886       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3887     } else {
3888       /* TODO */
3889     }
3890     ptr_idxs += olengths_idxs[i];
3891     ptr_vals += olengths_vals[i];
3892   }
3893   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3894   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3895   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3896   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3897   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3898   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3899 
3900 #if 0
3901   if (!restrict_comm) { /* check */
3902     Vec       lvec,rvec;
3903     PetscReal infty_error;
3904 
3905     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3906     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3907     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3908     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3909     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3910     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3911     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3912     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3913     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3914   }
3915 #endif
3916 
3917   /* assemble new additional is (if any) */
3918   if (nis) {
3919     PetscInt **temp_idxs,*count_is,j,psum;
3920 
3921     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3922     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3923     ptr_idxs = recv_buffer_idxs_is;
3924     psum = 0;
3925     for (i=0;i<n_recvs;i++) {
3926       for (j=0;j<nis;j++) {
3927         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3928         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3929         psum += plen;
3930         ptr_idxs += plen+1; /* shift pointer to received data */
3931       }
3932     }
3933     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3934     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3935     for (i=1;i<nis;i++) {
3936       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3937     }
3938     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3939     ptr_idxs = recv_buffer_idxs_is;
3940     for (i=0;i<n_recvs;i++) {
3941       for (j=0;j<nis;j++) {
3942         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3943         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3944         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3945         ptr_idxs += plen+1; /* shift pointer to received data */
3946       }
3947     }
3948     for (i=0;i<nis;i++) {
3949       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3950       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3951       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3952     }
3953     ierr = PetscFree(count_is);CHKERRQ(ierr);
3954     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3955     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3956   }
3957   /* free workspace */
3958   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3959   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3960   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3961   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3962   if (isdense) {
3963     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3964     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3965   } else {
3966     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3967   }
3968   if (nis) {
3969     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3970     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3971   }
3972   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3973   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3974   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3975   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3976   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3977   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3978   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3979   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3980   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3981   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3982   ierr = PetscFree(onodes);CHKERRQ(ierr);
3983   if (nis) {
3984     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3985     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3986     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3987   }
3988   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3989   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3990     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3991     for (i=0;i<nis;i++) {
3992       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3993     }
3994     *mat_n = NULL;
3995   }
3996   PetscFunctionReturn(0);
3997 }
3998 
3999 /* temporary hack into ksp private data structure */
4000 #include <petsc/private/kspimpl.h>
4001 
4002 #undef __FUNCT__
4003 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4004 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4005 {
4006   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4007   PC_IS                  *pcis = (PC_IS*)pc->data;
4008   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4009   MatNullSpace           CoarseNullSpace=NULL;
4010   ISLocalToGlobalMapping coarse_islg;
4011   IS                     coarse_is,*isarray;
4012   PetscInt               i,im_active=-1,active_procs=-1;
4013   PetscInt               nis,nisdofs,nisneu;
4014   PC                     pc_temp;
4015   PCType                 coarse_pc_type;
4016   KSPType                coarse_ksp_type;
4017   PetscBool              multilevel_requested,multilevel_allowed;
4018   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4019   Mat                    t_coarse_mat_is;
4020   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4021   PetscMPIInt            all_procs;
4022   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4023   PetscBool              compute_vecs = PETSC_FALSE;
4024   PetscScalar            *array;
4025   PetscErrorCode         ierr;
4026 
4027   PetscFunctionBegin;
4028   /* Assign global numbering to coarse dofs */
4029   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4030     PetscInt ocoarse_size;
4031     compute_vecs = PETSC_TRUE;
4032     ocoarse_size = pcbddc->coarse_size;
4033     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4034     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4035     /* see if we can avoid some work */
4036     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4037       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4038       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4039         PC        pc;
4040         PetscBool isbddc;
4041 
4042         /* temporary workaround since PCBDDC does not have a reset method so far */
4043         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4044         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4045         if (isbddc) {
4046           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4047         }
4048         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4049         coarse_reuse = PETSC_FALSE;
4050       } else { /* we can safely reuse already computed coarse matrix */
4051         coarse_reuse = PETSC_TRUE;
4052       }
4053     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4054       coarse_reuse = PETSC_FALSE;
4055     }
4056     /* reset any subassembling information */
4057     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4058     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4059   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4060     coarse_reuse = PETSC_TRUE;
4061   }
4062 
4063   /* count "active" (i.e. with positive local size) and "void" processes */
4064   im_active = !!(pcis->n);
4065   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4066   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4067   void_procs = all_procs-active_procs;
4068   csin_type_simple = PETSC_TRUE;
4069   redist = PETSC_FALSE;
4070   if (pcbddc->current_level && void_procs) {
4071     csin_ml = PETSC_TRUE;
4072     ncoarse_ml = void_procs;
4073     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4074     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4075       csin_ds = PETSC_TRUE;
4076       ncoarse_ds = pcbddc->redistribute_coarse;
4077       redist = PETSC_TRUE;
4078     } else {
4079       csin_ds = PETSC_TRUE;
4080       ncoarse_ds = active_procs;
4081       redist = PETSC_TRUE;
4082     }
4083   } else {
4084     csin_ml = PETSC_FALSE;
4085     ncoarse_ml = all_procs;
4086     if (void_procs) {
4087       csin_ds = PETSC_TRUE;
4088       ncoarse_ds = void_procs;
4089       csin_type_simple = PETSC_FALSE;
4090     } else {
4091       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4092         csin_ds = PETSC_TRUE;
4093         ncoarse_ds = pcbddc->redistribute_coarse;
4094         redist = PETSC_TRUE;
4095       } else {
4096         csin_ds = PETSC_FALSE;
4097         ncoarse_ds = all_procs;
4098       }
4099     }
4100   }
4101 
4102   /*
4103     test if we can go multilevel: three conditions must be satisfied:
4104     - we have not exceeded the number of levels requested
4105     - we can actually subassemble the active processes
4106     - we can find a suitable number of MPI processes where we can place the subassembled problem
4107   */
4108   multilevel_allowed = PETSC_FALSE;
4109   multilevel_requested = PETSC_FALSE;
4110   if (pcbddc->current_level < pcbddc->max_levels) {
4111     multilevel_requested = PETSC_TRUE;
4112     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4113       multilevel_allowed = PETSC_FALSE;
4114     } else {
4115       multilevel_allowed = PETSC_TRUE;
4116     }
4117   }
4118   /* determine number of process partecipating to coarse solver */
4119   if (multilevel_allowed) {
4120     ncoarse = ncoarse_ml;
4121     csin = csin_ml;
4122     redist = PETSC_FALSE;
4123   } else {
4124     ncoarse = ncoarse_ds;
4125     csin = csin_ds;
4126   }
4127 
4128   /* creates temporary l2gmap and IS for coarse indexes */
4129   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4130   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4131 
4132   /* creates temporary MATIS object for coarse matrix */
4133   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4134   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4135   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4136   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4137 #if 0
4138   {
4139     PetscViewer viewer;
4140     char filename[256];
4141     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4142     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4143     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4144     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4145     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4146   }
4147 #endif
4148   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4149   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4150   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4151   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4152   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4153 
4154   /* compute dofs splitting and neumann boundaries for coarse dofs */
4155   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4156     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4157     const PetscInt         *idxs;
4158     ISLocalToGlobalMapping tmap;
4159 
4160     /* create map between primal indices (in local representative ordering) and local primal numbering */
4161     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4162     /* allocate space for temporary storage */
4163     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4164     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4165     /* allocate for IS array */
4166     nisdofs = pcbddc->n_ISForDofsLocal;
4167     nisneu = !!pcbddc->NeumannBoundariesLocal;
4168     nis = nisdofs + nisneu;
4169     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4170     /* dofs splitting */
4171     for (i=0;i<nisdofs;i++) {
4172       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4173       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4174       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4175       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4176       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4177       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4178       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4179       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4180     }
4181     /* neumann boundaries */
4182     if (pcbddc->NeumannBoundariesLocal) {
4183       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4184       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4185       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4186       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4187       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4188       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4189       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4190       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4191     }
4192     /* free memory */
4193     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4194     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4195     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4196   } else {
4197     nis = 0;
4198     nisdofs = 0;
4199     nisneu = 0;
4200     isarray = NULL;
4201   }
4202   /* destroy no longer needed map */
4203   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4204 
4205   /* restrict on coarse candidates (if needed) */
4206   coarse_mat_is = NULL;
4207   if (csin) {
4208     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4209       if (redist) {
4210         PetscMPIInt rank;
4211         PetscInt    spc,n_spc_p1,dest[1],destsize;
4212 
4213         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4214         spc = active_procs/ncoarse;
4215         n_spc_p1 = active_procs%ncoarse;
4216         if (im_active) {
4217           destsize = 1;
4218           if (rank > n_spc_p1*(spc+1)-1) {
4219             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4220           } else {
4221             dest[0] = rank/(spc+1);
4222           }
4223         } else {
4224           destsize = 0;
4225         }
4226         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4227       } else if (csin_type_simple) {
4228         PetscMPIInt rank;
4229         PetscInt    issize,isidx;
4230 
4231         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4232         if (im_active) {
4233           issize = 1;
4234           isidx = (PetscInt)rank;
4235         } else {
4236           issize = 0;
4237           isidx = -1;
4238         }
4239         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4240       } else { /* get a suitable subassembling pattern from MATIS code */
4241         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4242       }
4243 
4244       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4245       if (!redist || ncoarse <= void_procs) {
4246         PetscInt ncoarse_cand,tissize,*nisindices;
4247         PetscInt *coarse_candidates;
4248         const PetscInt* tisindices;
4249 
4250         /* get coarse candidates' ranks in pc communicator */
4251         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4252         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4253         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4254           if (!coarse_candidates[i]) {
4255             coarse_candidates[ncoarse_cand++]=i;
4256           }
4257         }
4258         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4259 
4260 
4261         if (pcbddc->dbg_flag) {
4262           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4263           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4264           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4265           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4266           for (i=0;i<ncoarse_cand;i++) {
4267             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4268           }
4269           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4270           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4271         }
4272         /* shift the pattern on coarse candidates */
4273         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4274         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4275         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4276         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4277         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4278         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4279         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4280       }
4281       if (pcbddc->dbg_flag) {
4282         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4283         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4284         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4285         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4286       }
4287     }
4288     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4289     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4290       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4291     } else { /* this is the last level, so use just receiving processes in subcomm */
4292       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4293     }
4294   } else {
4295     if (pcbddc->dbg_flag) {
4296       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4297       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4298       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4299     }
4300     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4301     coarse_mat_is = t_coarse_mat_is;
4302   }
4303 
4304   /* create local to global scatters for coarse problem */
4305   if (compute_vecs) {
4306     PetscInt lrows;
4307     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4308     if (coarse_mat_is) {
4309       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4310     } else {
4311       lrows = 0;
4312     }
4313     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4314     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4315     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4316     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4317     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4318   }
4319   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4320   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4321 
4322   /* set defaults for coarse KSP and PC */
4323   if (multilevel_allowed) {
4324     coarse_ksp_type = KSPRICHARDSON;
4325     coarse_pc_type = PCBDDC;
4326   } else {
4327     coarse_ksp_type = KSPPREONLY;
4328     coarse_pc_type = PCREDUNDANT;
4329   }
4330 
4331   /* print some info if requested */
4332   if (pcbddc->dbg_flag) {
4333     if (!multilevel_allowed) {
4334       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4335       if (multilevel_requested) {
4336         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4337       } else if (pcbddc->max_levels) {
4338         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4339       }
4340       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4341     }
4342   }
4343 
4344   /* create the coarse KSP object only once with defaults */
4345   if (coarse_mat_is) {
4346     MatReuse coarse_mat_reuse;
4347     PetscViewer dbg_viewer = NULL;
4348     if (pcbddc->dbg_flag) {
4349       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4350       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4351     }
4352     if (!pcbddc->coarse_ksp) {
4353       char prefix[256],str_level[16];
4354       size_t len;
4355       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4356       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4357       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4358       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4359       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4360       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4361       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4362       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4363       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4364       /* prefix */
4365       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4366       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4367       if (!pcbddc->current_level) {
4368         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4369         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4370       } else {
4371         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4372         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4373         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4374         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4375         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4376         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4377       }
4378       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4379       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4380       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4381       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4382       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4383       /* allow user customization */
4384       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4385     }
4386     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4387     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4388     if (nisdofs) {
4389       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4390       for (i=0;i<nisdofs;i++) {
4391         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4392       }
4393     }
4394     if (nisneu) {
4395       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4396       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4397     }
4398 
4399     /* get some info after set from options */
4400     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4401     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4402     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4403     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4404       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4405       isbddc = PETSC_FALSE;
4406     }
4407     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4408     if (isredundant) {
4409       KSP inner_ksp;
4410       PC  inner_pc;
4411       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4412       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4413       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4414     }
4415 
4416     /* assemble coarse matrix */
4417     if (coarse_reuse) {
4418       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4419       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4420       coarse_mat_reuse = MAT_REUSE_MATRIX;
4421     } else {
4422       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4423     }
4424     if (isbddc || isnn) {
4425       if (pcbddc->coarsening_ratio > 1) {
4426         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4427           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4428           if (pcbddc->dbg_flag) {
4429             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4430             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4431             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4432             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4433           }
4434         }
4435         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4436       } else {
4437         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4438         coarse_mat = coarse_mat_is;
4439       }
4440     } else {
4441       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4442     }
4443     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4444 
4445     /* propagate symmetry info of coarse matrix */
4446     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4447     if (pc->pmat->symmetric_set) {
4448       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4449     }
4450     if (pc->pmat->hermitian_set) {
4451       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4452     }
4453     if (pc->pmat->spd_set) {
4454       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4455     }
4456     /* set operators */
4457     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4458     if (pcbddc->dbg_flag) {
4459       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4460     }
4461   } else { /* processes non partecipating to coarse solver (if any) */
4462     coarse_mat = 0;
4463   }
4464   ierr = PetscFree(isarray);CHKERRQ(ierr);
4465 #if 0
4466   {
4467     PetscViewer viewer;
4468     char filename[256];
4469     sprintf(filename,"coarse_mat.m");
4470     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4471     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4472     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4473     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4474   }
4475 #endif
4476 
4477   /* Compute coarse null space (special handling by BDDC only) */
4478 #if 0
4479   if (pcbddc->NullSpace) {
4480     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4481   }
4482 #endif
4483 
4484   if (pcbddc->coarse_ksp) {
4485     Vec crhs,csol;
4486     PetscBool ispreonly;
4487 
4488     if (CoarseNullSpace) {
4489       if (isbddc) {
4490         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4491       } else {
4492         ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
4493       }
4494     }
4495     /* setup coarse ksp */
4496     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4497     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4498     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4499     /* hack */
4500     if (!csol) {
4501       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4502     }
4503     if (!crhs) {
4504       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4505     }
4506     /* Check coarse problem if in debug mode or if solving with an iterative method */
4507     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4508     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4509       KSP       check_ksp;
4510       KSPType   check_ksp_type;
4511       PC        check_pc;
4512       Vec       check_vec,coarse_vec;
4513       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4514       PetscInt  its;
4515       PetscBool compute_eigs;
4516       PetscReal *eigs_r,*eigs_c;
4517       PetscInt  neigs;
4518       const char *prefix;
4519 
4520       /* Create ksp object suitable for estimation of extreme eigenvalues */
4521       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4522       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4523       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4524       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4525       if (ispreonly) {
4526         check_ksp_type = KSPPREONLY;
4527         compute_eigs = PETSC_FALSE;
4528       } else {
4529         check_ksp_type = KSPGMRES;
4530         compute_eigs = PETSC_TRUE;
4531       }
4532       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4533       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4534       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4535       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4536       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4537       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4538       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4539       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4540       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4541       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4542       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4543       /* create random vec */
4544       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4545       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4546       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4547       if (CoarseNullSpace) {
4548         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4549       }
4550       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4551       /* solve coarse problem */
4552       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4553       if (CoarseNullSpace) {
4554         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4555       }
4556       /* set eigenvalue estimation if preonly has not been requested */
4557       if (compute_eigs) {
4558         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4559         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4560         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4561         lambda_max = eigs_r[neigs-1];
4562         lambda_min = eigs_r[0];
4563         if (pcbddc->use_coarse_estimates) {
4564           if (lambda_max>lambda_min) {
4565             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4566             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4567           }
4568         }
4569       }
4570 
4571       /* check coarse problem residual error */
4572       if (pcbddc->dbg_flag) {
4573         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4574         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4575         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4576         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4577         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4578         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4579         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4580         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4581         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4582         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4583         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4584         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4585         if (compute_eigs) {
4586           PetscReal lambda_max_s,lambda_min_s;
4587           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4588           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4589           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4590           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4591           for (i=0;i<neigs;i++) {
4592             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4593           }
4594         }
4595         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4596         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4597       }
4598       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4599       if (compute_eigs) {
4600         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4601         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4602       }
4603     }
4604   }
4605   /* print additional info */
4606   if (pcbddc->dbg_flag) {
4607     /* waits until all processes reaches this point */
4608     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4609     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4610     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4611   }
4612 
4613   /* free memory */
4614   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4615   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4616   PetscFunctionReturn(0);
4617 }
4618 
4619 #undef __FUNCT__
4620 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4621 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4622 {
4623   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4624   PC_IS*         pcis = (PC_IS*)pc->data;
4625   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4626   IS             subset,subset_mult,subset_n;
4627   PetscInt       local_size,coarse_size=0;
4628   PetscInt       *local_primal_indices=NULL;
4629   const PetscInt *t_local_primal_indices;
4630   PetscErrorCode ierr;
4631 
4632   PetscFunctionBegin;
4633   /* Compute global number of coarse dofs */
4634   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4635     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4636   }
4637   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
4638   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,subset_n,&subset);CHKERRQ(ierr);
4639   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4640   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
4641   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
4642   ierr = ISDestroy(&subset);CHKERRQ(ierr);
4643   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
4644   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
4645   if (local_size != pcbddc->local_primal_size) {
4646     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4647   }
4648   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
4649   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4650   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
4651   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4652   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4653 
4654   /* check numbering */
4655   if (pcbddc->dbg_flag) {
4656     PetscScalar coarsesum,*array;
4657     PetscInt    i;
4658     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4659 
4660     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4661     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4662     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4663     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4664     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4665     for (i=0;i<pcbddc->local_primal_size;i++) {
4666       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4667     }
4668     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4669     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4670     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4671     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4672     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4673     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4674     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4675     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4676     for (i=0;i<pcis->n;i++) {
4677       if (array[i] == 1.0) {
4678         set_error = PETSC_TRUE;
4679         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4680       }
4681     }
4682     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4683     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4684     for (i=0;i<pcis->n;i++) {
4685       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4686     }
4687     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4688     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4689     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4690     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4691     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4692     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4693     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4694       PetscInt *gidxs;
4695 
4696       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4697       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4698       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4699       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4700       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4701       for (i=0;i<pcbddc->local_primal_size;i++) {
4702         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4703       }
4704       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4705       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4706     }
4707     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4708     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4709   }
4710   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4711   /* get back data */
4712   *coarse_size_n = coarse_size;
4713   *local_primal_indices_n = local_primal_indices;
4714   PetscFunctionReturn(0);
4715 }
4716 
4717 #undef __FUNCT__
4718 #define __FUNCT__ "PCBDDCGlobalToLocal"
4719 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4720 {
4721   IS             localis_t;
4722   PetscInt       i,lsize,*idxs,n;
4723   PetscScalar    *vals;
4724   PetscErrorCode ierr;
4725 
4726   PetscFunctionBegin;
4727   /* get indices in local ordering exploiting local to global map */
4728   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4729   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4730   for (i=0;i<lsize;i++) vals[i] = 1.0;
4731   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4732   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4733   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4734   if (idxs) { /* multilevel guard */
4735     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4736   }
4737   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4738   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4739   ierr = PetscFree(vals);CHKERRQ(ierr);
4740   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4741   /* now compute set in local ordering */
4742   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4743   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4744   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4745   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4746   for (i=0,lsize=0;i<n;i++) {
4747     if (PetscRealPart(vals[i]) > 0.5) {
4748       lsize++;
4749     }
4750   }
4751   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4752   for (i=0,lsize=0;i<n;i++) {
4753     if (PetscRealPart(vals[i]) > 0.5) {
4754       idxs[lsize++] = i;
4755     }
4756   }
4757   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4758   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4759   *localis = localis_t;
4760   PetscFunctionReturn(0);
4761 }
4762 
4763 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4764 #undef __FUNCT__
4765 #define __FUNCT__ "PCBDDCMatMult_Private"
4766 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4767 {
4768   PCBDDCChange_ctx change_ctx;
4769   PetscErrorCode   ierr;
4770 
4771   PetscFunctionBegin;
4772   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4773   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4774   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4775   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4776   PetscFunctionReturn(0);
4777 }
4778 
4779 #undef __FUNCT__
4780 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4781 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4782 {
4783   PCBDDCChange_ctx change_ctx;
4784   PetscErrorCode   ierr;
4785 
4786   PetscFunctionBegin;
4787   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4788   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4789   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4790   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4791   PetscFunctionReturn(0);
4792 }
4793 
4794 #undef __FUNCT__
4795 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4796 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4797 {
4798   PC_IS               *pcis=(PC_IS*)pc->data;
4799   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4800   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4801   Mat                 S_j;
4802   PetscInt            *used_xadj,*used_adjncy;
4803   PetscBool           free_used_adj;
4804   PetscErrorCode      ierr;
4805 
4806   PetscFunctionBegin;
4807   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4808   free_used_adj = PETSC_FALSE;
4809   if (pcbddc->sub_schurs_layers == -1) {
4810     used_xadj = NULL;
4811     used_adjncy = NULL;
4812   } else {
4813     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4814       used_xadj = pcbddc->mat_graph->xadj;
4815       used_adjncy = pcbddc->mat_graph->adjncy;
4816     } else if (pcbddc->computed_rowadj) {
4817       used_xadj = pcbddc->mat_graph->xadj;
4818       used_adjncy = pcbddc->mat_graph->adjncy;
4819     } else {
4820       PetscBool      flg_row=PETSC_FALSE;
4821       const PetscInt *xadj,*adjncy;
4822       PetscInt       nvtxs;
4823 
4824       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4825       if (flg_row) {
4826         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4827         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4828         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4829         free_used_adj = PETSC_TRUE;
4830       } else {
4831         pcbddc->sub_schurs_layers = -1;
4832         used_xadj = NULL;
4833         used_adjncy = NULL;
4834       }
4835       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4836     }
4837   }
4838 
4839   /* setup sub_schurs data */
4840   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4841   if (!sub_schurs->use_mumps) {
4842     /* pcbddc->ksp_D up to date only if not using MUMPS */
4843     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4844     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr);
4845   } else {
4846     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4847     PetscBool isseqaij;
4848     if (!pcbddc->use_vertices && reuse_solvers) {
4849       PetscInt n_vertices;
4850 
4851       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
4852       reuse_solvers = (PetscBool)!n_vertices;
4853     }
4854     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
4855     if (!isseqaij) {
4856       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4857       if (matis->A == pcbddc->local_mat) {
4858         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
4859         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
4860       } else {
4861         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
4862       }
4863     }
4864     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr);
4865   }
4866   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4867 
4868   /* free adjacency */
4869   if (free_used_adj) {
4870     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4871   }
4872   PetscFunctionReturn(0);
4873 }
4874 
4875 #undef __FUNCT__
4876 #define __FUNCT__ "PCBDDCInitSubSchurs"
4877 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4878 {
4879   PC_IS               *pcis=(PC_IS*)pc->data;
4880   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4881   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4882   PCBDDCGraph         graph;
4883   PetscErrorCode      ierr;
4884 
4885   PetscFunctionBegin;
4886   /* attach interface graph for determining subsets */
4887   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4888     IS       verticesIS,verticescomm;
4889     PetscInt vsize,*idxs;
4890 
4891     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4892     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
4893     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4894     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
4895     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
4896     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4897     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4898     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4899     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
4900     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
4901     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4902 /*
4903     if (pcbddc->dbg_flag) {
4904       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4905     }
4906 */
4907   } else {
4908     graph = pcbddc->mat_graph;
4909   }
4910 
4911   /* sub_schurs init */
4912   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4913 
4914   /* free graph struct */
4915   if (pcbddc->sub_schurs_rebuild) {
4916     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4917   }
4918   PetscFunctionReturn(0);
4919 }
4920