1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 29 30 if (pcbddc->dbg_flag) { 31 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 32 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 33 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 35 } 36 37 if (pcbddc->dbg_flag) { 38 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 39 } 40 41 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 42 43 /* max size of subsets */ 44 mss = 0; 45 for (i=0;i<sub_schurs->n_subs;i++) { 46 PetscInt subset_size; 47 48 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 49 mss = PetscMax(mss,subset_size); 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 55 nmax = PetscMax(nmin,nmax); 56 allocated_S_St = PETSC_FALSE; 57 if (nmin) { 58 allocated_S_St = PETSC_TRUE; 59 } 60 61 /* allocate lapack workspace */ 62 cum = cum2 = 0; 63 maxneigs = 0; 64 for (i=0;i<sub_schurs->n_subs;i++) { 65 PetscInt n,subset_size; 66 67 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 68 n = PetscMin(subset_size,nmax); 69 cum += subset_size; 70 cum2 += subset_size*n; 71 maxneigs = PetscMax(maxneigs,n); 72 } 73 if (mss) { 74 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 75 PetscBLASInt B_itype = 1; 76 PetscBLASInt B_N = mss; 77 PetscReal zero = 0.0; 78 PetscReal eps = 0.0; /* dlamch? */ 79 80 B_lwork = -1; 81 S = NULL; 82 St = NULL; 83 eigs = NULL; 84 eigv = NULL; 85 B_iwork = NULL; 86 B_ifail = NULL; 87 #if defined(PETSC_USE_COMPLEX) 88 rwork = NULL; 89 #endif 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 108 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 if (allocated_S_St) { 112 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 113 } 114 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 115 #if defined(PETSC_USE_COMPLEX) 116 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 117 #endif 118 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 119 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 120 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 121 nv+cum,&pcbddc->adaptive_constraints_idxs, 122 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 123 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 124 125 maxneigs = 0; 126 cum = cumarray = 0; 127 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 128 pcbddc->adaptive_constraints_data_ptr[0] = 0; 129 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_data[cum] = 1.0; 137 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 138 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 139 } 140 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 141 } 142 143 if (mss) { /* multilevel */ 144 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 146 } 147 148 for (i=0;i<sub_schurs->n_subs;i++) { 149 150 const PetscInt *idxs; 151 PetscReal infty = PETSC_MAX_REAL; 152 PetscInt j,subset_size,eigs_start = 0; 153 PetscBLASInt B_N; 154 PetscBool same_data = PETSC_FALSE; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 158 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 159 if (sub_schurs->is_hermitian) { 160 PetscInt j,k; 161 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 162 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 163 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 } 165 for (j=0;j<subset_size;j++) { 166 for (k=j;k<subset_size;k++) { 167 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 168 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 169 } 170 } 171 } else { 172 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 173 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 } 175 } else { 176 S = Sarray + cumarray; 177 St = Starray + cumarray; 178 } 179 180 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 181 /* see if we can save some work */ 182 if (sub_schurs->n_subs == 1) { 183 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 184 } 185 186 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 187 B_neigs = 0; 188 } else { 189 /* Threshold: this is an heuristic for edges */ 190 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 191 192 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 193 PetscBLASInt B_itype = 1; 194 PetscBLASInt B_IL, B_IU; 195 PetscReal eps = -1.0; /* dlamch? */ 196 PetscInt nmin_s; 197 198 /* ask for eigenvalues larger than thresh */ 199 if (pcbddc->dbg_flag) { 200 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 201 } 202 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 203 #if defined(PETSC_USE_COMPLEX) 204 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 205 #else 206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 207 #endif 208 ierr = PetscFPTrapPop();CHKERRQ(ierr); 209 if (B_ierr) { 210 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 211 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 212 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 213 } 214 215 if (B_neigs > nmax) { 216 if (pcbddc->dbg_flag) { 217 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 218 } 219 eigs_start = B_neigs -nmax; 220 B_neigs = nmax; 221 } 222 223 nmin_s = PetscMin(nmin,B_N); 224 if (B_neigs < nmin_s) { 225 PetscBLASInt B_neigs2; 226 227 B_IU = B_N - B_neigs; 228 B_IL = B_N - nmin_s + 1; 229 if (pcbddc->dbg_flag) { 230 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 231 } 232 if (sub_schurs->is_hermitian) { 233 PetscInt j; 234 for (j=0;j<subset_size;j++) { 235 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 236 } 237 for (j=0;j<subset_size;j++) { 238 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 239 } 240 } else { 241 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 242 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 } 244 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 245 #if defined(PETSC_USE_COMPLEX) 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 247 #else 248 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 249 #endif 250 ierr = PetscFPTrapPop();CHKERRQ(ierr); 251 B_neigs += B_neigs2; 252 } 253 if (B_ierr) { 254 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 255 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 257 } 258 if (pcbddc->dbg_flag) { 259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 260 for (j=0;j<B_neigs;j++) { 261 if (eigs[j] == 0.0) { 262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 263 } else { 264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 265 } 266 } 267 } 268 } else { 269 /* TODO */ 270 } 271 } 272 maxneigs = PetscMax(B_neigs,maxneigs); 273 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 274 if (B_neigs) { 275 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 276 277 if (pcbddc->dbg_flag > 1) { 278 PetscInt ii; 279 for (ii=0;ii<B_neigs;ii++) { 280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 281 for (j=0;j<B_N;j++) { 282 #if defined(PETSC_USE_COMPLEX) 283 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 284 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 286 #else 287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 288 #endif 289 } 290 } 291 } 292 #if 0 293 for (j=0;j<B_neigs;j++) { 294 PetscBLASInt Blas_N,Blas_one = 1.0; 295 PetscScalar norm; 296 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 297 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 298 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 299 if (pcbddc->adaptive_constraints_data[cum] > 0.0) { 300 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 301 } else { 302 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 303 } 304 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 305 } 306 #endif 307 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 308 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 309 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 310 cum++; 311 } 312 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 313 /* shift for next computation */ 314 cumarray += subset_size*subset_size; 315 } 316 if (pcbddc->dbg_flag) { 317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 318 } 319 320 if (mss) { 321 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 322 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 323 /* destroy matrices (junk) */ 324 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 325 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 326 } 327 if (allocated_S_St) { 328 ierr = PetscFree2(S,St);CHKERRQ(ierr); 329 } 330 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 331 #if defined(PETSC_USE_COMPLEX) 332 ierr = PetscFree(rwork);CHKERRQ(ierr); 333 #endif 334 if (pcbddc->dbg_flag) { 335 PetscInt maxneigs_r; 336 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 337 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 338 } 339 PetscFunctionReturn(0); 340 } 341 342 #undef __FUNCT__ 343 #define __FUNCT__ "PCBDDCSetUpSolvers" 344 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 345 { 346 PetscScalar *coarse_submat_vals; 347 PetscErrorCode ierr; 348 349 PetscFunctionBegin; 350 /* Setup local scatters R_to_B and (optionally) R_to_D */ 351 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 352 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 353 354 /* Setup local neumann solver ksp_R */ 355 /* PCBDDCSetUpLocalScatters should be called first! */ 356 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 357 358 /* 359 Setup local correction and local part of coarse basis. 360 Gives back the dense local part of the coarse matrix in column major ordering 361 */ 362 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 363 364 /* Compute total number of coarse nodes and setup coarse solver */ 365 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 366 367 /* free */ 368 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 372 #undef __FUNCT__ 373 #define __FUNCT__ "PCBDDCResetCustomization" 374 PetscErrorCode PCBDDCResetCustomization(PC pc) 375 { 376 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 381 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 382 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 383 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 386 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 388 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 389 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "PCBDDCResetTopography" 395 PetscErrorCode PCBDDCResetTopography(PC pc) 396 { 397 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 398 PetscErrorCode ierr; 399 400 PetscFunctionBegin; 401 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 403 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 404 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 405 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "PCBDDCResetSolvers" 411 PetscErrorCode PCBDDCResetSolvers(PC pc) 412 { 413 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 414 PetscScalar *array; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 419 if (pcbddc->coarse_phi_B) { 420 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 421 ierr = PetscFree(array);CHKERRQ(ierr); 422 } 423 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 424 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 425 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 426 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 427 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 428 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 433 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 434 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 435 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 436 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 437 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 438 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 439 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 440 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 441 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 442 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 443 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 444 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 445 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 451 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 452 { 453 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 454 PC_IS *pcis = (PC_IS*)pc->data; 455 VecType impVecType; 456 PetscInt n_constraints,n_R,old_size; 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 461 /* get sizes */ 462 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 463 n_R = pcis->n-pcbddc->n_vertices; 464 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 465 /* local work vectors (try to avoid unneeded work)*/ 466 /* R nodes */ 467 old_size = -1; 468 if (pcbddc->vec1_R) { 469 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 470 } 471 if (n_R != old_size) { 472 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 473 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 474 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 475 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 476 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 477 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 478 } 479 /* local primal dofs */ 480 old_size = -1; 481 if (pcbddc->vec1_P) { 482 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 483 } 484 if (pcbddc->local_primal_size != old_size) { 485 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 486 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 487 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 488 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 489 } 490 /* local explicit constraints */ 491 old_size = -1; 492 if (pcbddc->vec1_C) { 493 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 494 } 495 if (n_constraints && n_constraints != old_size) { 496 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 497 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 498 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 499 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 500 } 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "PCBDDCSetUpCorrection" 506 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 507 { 508 PetscErrorCode ierr; 509 /* pointers to pcis and pcbddc */ 510 PC_IS* pcis = (PC_IS*)pc->data; 511 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 512 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 513 /* submatrices of local problem */ 514 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 515 /* submatrices of local coarse problem */ 516 Mat S_VV,S_CV,S_VC,S_CC; 517 /* working matrices */ 518 Mat C_CR; 519 /* additional working stuff */ 520 PC pc_R; 521 Mat F; 522 PetscBool isLU,isCHOL,isILU; 523 524 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 525 PetscScalar *work; 526 PetscInt *idx_V_B; 527 PetscInt n,n_vertices,n_constraints; 528 PetscInt i,n_R,n_D,n_B; 529 PetscBool unsymmetric_check; 530 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 531 MatType impMatType; 532 /* some shortcuts to scalars */ 533 PetscScalar one=1.0,m_one=-1.0; 534 535 PetscFunctionBegin; 536 n_vertices = pcbddc->n_vertices; 537 n_constraints = pcbddc->local_primal_size-n_vertices; 538 /* Set Non-overlapping dimensions */ 539 n_B = pcis->n_B; 540 n_D = pcis->n - n_B; 541 n_R = pcis->n - n_vertices; 542 543 /* Set types for local objects needed by BDDC precondtioner */ 544 impMatType = MATSEQDENSE; 545 546 /* vertices in boundary numbering */ 547 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 548 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 549 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 550 551 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 552 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 553 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 554 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 555 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 556 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 557 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 558 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 560 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 561 562 unsymmetric_check = PETSC_FALSE; 563 /* allocate workspace */ 564 n = 0; 565 if (n_constraints) { 566 n += n_R*n_constraints; 567 } 568 if (n_vertices) { 569 n = PetscMax(2*n_R*n_vertices,n); 570 n = PetscMax((n_R+n_B)*n_vertices,n); 571 } 572 if (!pcbddc->symmetric_primal) { 573 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 574 unsymmetric_check = PETSC_TRUE; 575 } 576 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 577 578 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 579 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 580 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 581 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 582 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 583 if (isLU || isILU || isCHOL) { 584 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 585 } else if (sub_schurs->reuse_mumps) { 586 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 587 MatFactorType type; 588 589 F = reuse_mumps->F; 590 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 591 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 592 } else { 593 F = NULL; 594 } 595 596 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 597 if (n_constraints) { 598 Mat M1,M2,M3; 599 Mat auxmat; 600 IS is_aux; 601 PetscScalar *array,*array2; 602 603 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 604 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 605 606 /* Extract constraints on R nodes: C_{CR} */ 607 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 608 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 609 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 610 611 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 612 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 613 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 614 for (i=0;i<n_constraints;i++) { 615 const PetscScalar *row_cmat_values; 616 const PetscInt *row_cmat_indices; 617 PetscInt size_of_constraint,j; 618 619 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 620 for (j=0;j<size_of_constraint;j++) { 621 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 622 } 623 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 624 } 625 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 626 if (F) { 627 Mat B; 628 629 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 630 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 631 ierr = MatDestroy(&B);CHKERRQ(ierr); 632 } else { 633 PetscScalar *marr; 634 635 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 636 for (i=0;i<n_constraints;i++) { 637 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 638 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 639 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 640 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 641 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 642 } 643 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 } 645 if (!pcbddc->switch_static) { 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 647 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 648 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 649 for (i=0;i<n_constraints;i++) { 650 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 651 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 652 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 653 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 654 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 655 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 656 } 657 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 658 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 659 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 660 } else { 661 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 662 pcbddc->local_auxmat2 = local_auxmat2_R; 663 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 664 } 665 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 666 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 667 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 668 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 669 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 670 if (isCHOL) { 671 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 672 } else { 673 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 674 } 675 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 676 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 677 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 678 ierr = MatDestroy(&M2);CHKERRQ(ierr); 679 ierr = MatDestroy(&M3);CHKERRQ(ierr); 680 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 681 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 682 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 683 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 684 ierr = MatDestroy(&M1);CHKERRQ(ierr); 685 } 686 /* Get submatrices from subdomain matrix */ 687 if (n_vertices) { 688 IS is_aux; 689 690 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 691 IS tis; 692 693 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 694 ierr = ISSort(tis);CHKERRQ(ierr); 695 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 696 ierr = ISDestroy(&tis);CHKERRQ(ierr); 697 } else { 698 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 699 } 700 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 701 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 702 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 703 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 704 } 705 706 /* Matrix of coarse basis functions (local) */ 707 if (pcbddc->coarse_phi_B) { 708 PetscInt on_B,on_primal,on_D=n_D; 709 if (pcbddc->coarse_phi_D) { 710 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 711 } 712 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 713 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 714 PetscScalar *marray; 715 716 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 717 ierr = PetscFree(marray);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 720 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 722 } 723 } 724 725 if (!pcbddc->coarse_phi_B) { 726 PetscScalar *marray; 727 728 n = n_B*pcbddc->local_primal_size; 729 if (pcbddc->switch_static || pcbddc->dbg_flag) { 730 n += n_D*pcbddc->local_primal_size; 731 } 732 if (!pcbddc->symmetric_primal) { 733 n *= 2; 734 } 735 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 736 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 737 n = n_B*pcbddc->local_primal_size; 738 if (pcbddc->switch_static || pcbddc->dbg_flag) { 739 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 740 n += n_D*pcbddc->local_primal_size; 741 } 742 if (!pcbddc->symmetric_primal) { 743 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 744 if (pcbddc->switch_static || pcbddc->dbg_flag) { 745 n = n_B*pcbddc->local_primal_size; 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 747 } 748 } else { 749 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 750 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 751 if (pcbddc->switch_static || pcbddc->dbg_flag) { 752 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 753 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 754 } 755 } 756 } 757 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 758 /* vertices */ 759 if (n_vertices) { 760 761 ierr = MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 762 763 if (n_R) { 764 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 765 PetscBLASInt B_N,B_one = 1; 766 PetscScalar *x,*y; 767 PetscBool isseqaij; 768 769 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 770 ierr = MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 771 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 772 if (F) { /* TODO could be optimized for symmetric problems */ 773 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 774 } else { 775 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 776 for (i=0;i<n_vertices;i++) { 777 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 778 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 779 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 780 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 781 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 782 } 783 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 784 } 785 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 786 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 787 if (n_constraints) { 788 Mat B; 789 790 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 791 for (i=0;i<n_vertices;i++) { 792 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 793 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 794 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 795 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 797 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 798 } 799 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 800 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 801 ierr = MatDestroy(&B);CHKERRQ(ierr); 802 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 803 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 804 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 805 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 806 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 807 ierr = MatDestroy(&B);CHKERRQ(ierr); 808 } 809 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 810 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 811 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 812 } 813 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 814 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 815 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 816 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 817 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 818 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 819 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 820 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 821 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 822 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 823 } else { 824 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 825 } 826 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 827 /* coarse basis functions */ 828 for (i=0;i<n_vertices;i++) { 829 PetscScalar *y; 830 831 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 832 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 y[n_B*i+idx_V_B[i]] = 1.0; 837 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 838 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 839 840 if (pcbddc->switch_static || pcbddc->dbg_flag) { 841 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 842 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 843 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 845 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 846 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 847 } 848 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 849 } 850 /* if n_R == 0 the object is not destroyed */ 851 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 852 } 853 854 if (n_constraints) { 855 Mat B; 856 857 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 858 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 859 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 860 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 861 if (n_vertices) { 862 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 863 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 864 } else { 865 Mat S_VCt; 866 867 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 868 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 869 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 870 } 871 } 872 ierr = MatDestroy(&B);CHKERRQ(ierr); 873 /* coarse basis functions */ 874 for (i=0;i<n_constraints;i++) { 875 PetscScalar *y; 876 877 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 878 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 879 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 880 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 883 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 884 if (pcbddc->switch_static || pcbddc->dbg_flag) { 885 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 886 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 887 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 888 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 890 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 891 } 892 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 893 } 894 } 895 if (n_constraints) { 896 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 897 } 898 899 /* compute other basis functions for non-symmetric problems */ 900 if (!pcbddc->symmetric_primal) { 901 902 if (n_constraints) { 903 Mat S_CCT,B_C; 904 905 /* this is a lazy thing */ 906 ierr = MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);CHKERRQ(ierr); 907 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 908 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 909 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 910 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 911 if (n_vertices) { 912 Mat B_V,S_VCT; 913 914 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 915 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 916 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 917 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 918 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 919 } 920 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 921 } else { /* if there are no constraints, reset work */ 922 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 923 } 924 if (n_vertices && n_R) { 925 Mat A_VRT; 926 PetscScalar *marray; 927 PetscBLASInt B_N,B_one = 1; 928 929 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 930 ierr = MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);CHKERRQ(ierr); 931 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 932 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 933 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 934 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 935 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 936 } 937 938 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 939 for (i=0;i<pcbddc->local_primal_size;i++) { 940 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 941 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 942 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 943 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 944 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 945 } 946 } else { 947 for (i=0;i<pcbddc->local_primal_size;i++) { 948 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 949 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 950 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 953 } 954 } 955 /* coarse basis functions */ 956 for (i=0;i<pcbddc->local_primal_size;i++) { 957 PetscScalar *y; 958 959 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 960 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 961 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 962 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 964 if (i<n_vertices) { 965 y[n_B*i+idx_V_B[i]] = 1.0; 966 } 967 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 968 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 969 970 if (pcbddc->switch_static || pcbddc->dbg_flag) { 971 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 972 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 973 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 974 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 976 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 977 } 978 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 979 } 980 } 981 /* free memory */ 982 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 983 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 984 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 985 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 986 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 987 ierr = PetscFree(work);CHKERRQ(ierr); 988 if (n_vertices) { 989 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 990 } 991 if (n_constraints) { 992 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 993 } 994 /* Checking coarse_sub_mat and coarse basis functios */ 995 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 996 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 997 if (pcbddc->dbg_flag) { 998 Mat coarse_sub_mat; 999 Mat AUXMAT,TM1,TM2,TM3,TM4; 1000 Mat coarse_phi_D,coarse_phi_B; 1001 Mat coarse_psi_D,coarse_psi_B; 1002 Mat A_II,A_BB,A_IB,A_BI; 1003 Mat C_B,CPHI; 1004 IS is_dummy; 1005 Vec mones; 1006 MatType checkmattype=MATSEQAIJ; 1007 PetscReal real_value; 1008 1009 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1010 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1011 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1012 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1013 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1014 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1015 if (unsymmetric_check) { 1016 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1017 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1018 } 1019 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1020 1021 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1022 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1023 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1024 if (unsymmetric_check) { 1025 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1026 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1027 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1028 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1029 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1030 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1031 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1032 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1033 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1034 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1035 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1036 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1037 } else { 1038 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1039 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1040 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1041 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1042 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1043 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1044 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1045 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1046 } 1047 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1048 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1049 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1050 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 1051 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1052 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1053 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1054 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1055 1056 /* check constraints */ 1057 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1058 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1059 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1060 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1061 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1062 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1063 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1064 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1065 if (unsymmetric_check) { 1066 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1067 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1068 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1069 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1070 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1071 } 1072 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1073 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1074 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1075 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1076 1077 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1078 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1079 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1080 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1081 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1082 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1083 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1084 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1085 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1086 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1087 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1088 if (unsymmetric_check) { 1089 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1090 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1091 } 1092 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1093 } 1094 /* get back data */ 1095 *coarse_submat_vals_n = coarse_submat_vals; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1101 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1102 { 1103 Mat *work_mat; 1104 IS isrow_s,iscol_s; 1105 PetscBool rsorted,csorted; 1106 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1111 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1112 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1113 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1114 1115 if (!rsorted) { 1116 const PetscInt *idxs; 1117 PetscInt *idxs_sorted,i; 1118 1119 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1120 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1121 for (i=0;i<rsize;i++) { 1122 idxs_perm_r[i] = i; 1123 } 1124 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1125 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1126 for (i=0;i<rsize;i++) { 1127 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1128 } 1129 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1130 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1131 } else { 1132 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1133 isrow_s = isrow; 1134 } 1135 1136 if (!csorted) { 1137 if (isrow == iscol) { 1138 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1139 iscol_s = isrow_s; 1140 } else { 1141 const PetscInt *idxs; 1142 PetscInt *idxs_sorted,i; 1143 1144 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1145 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1146 for (i=0;i<csize;i++) { 1147 idxs_perm_c[i] = i; 1148 } 1149 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1150 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1151 for (i=0;i<csize;i++) { 1152 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1153 } 1154 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1155 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1156 } 1157 } else { 1158 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1159 iscol_s = iscol; 1160 } 1161 1162 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1163 1164 if (!rsorted || !csorted) { 1165 Mat new_mat; 1166 IS is_perm_r,is_perm_c; 1167 1168 if (!rsorted) { 1169 PetscInt *idxs_r,i; 1170 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1171 for (i=0;i<rsize;i++) { 1172 idxs_r[idxs_perm_r[i]] = i; 1173 } 1174 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1175 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1176 } else { 1177 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1178 } 1179 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1180 1181 if (!csorted) { 1182 if (isrow_s == iscol_s) { 1183 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1184 is_perm_c = is_perm_r; 1185 } else { 1186 PetscInt *idxs_c,i; 1187 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1188 for (i=0;i<csize;i++) { 1189 idxs_c[idxs_perm_c[i]] = i; 1190 } 1191 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1192 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1193 } 1194 } else { 1195 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1196 } 1197 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1198 1199 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1200 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1201 work_mat[0] = new_mat; 1202 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1203 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1204 } 1205 1206 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1207 *B = work_mat[0]; 1208 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1209 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1210 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1216 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1217 { 1218 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1219 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1220 Mat new_mat; 1221 IS is_local,is_global; 1222 PetscInt local_size; 1223 PetscBool isseqaij; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1228 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1229 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1230 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1231 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1232 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1233 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1234 1235 /* check */ 1236 if (pcbddc->dbg_flag) { 1237 Vec x,x_change; 1238 PetscReal error; 1239 1240 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1241 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1242 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1243 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1246 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1247 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1248 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1249 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1250 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1251 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1252 ierr = VecDestroy(&x);CHKERRQ(ierr); 1253 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1254 } 1255 1256 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1257 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1258 if (isseqaij) { 1259 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1260 } else { 1261 Mat work_mat; 1262 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1263 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1264 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1265 } 1266 if (matis->A->symmetric_set) { 1267 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1268 #if !defined(PETSC_USE_COMPLEX) 1269 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1270 #endif 1271 } 1272 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1278 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1279 { 1280 PC_IS* pcis = (PC_IS*)(pc->data); 1281 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1282 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1283 PetscInt *idx_R_local=NULL; 1284 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1285 PetscInt vbs,bs; 1286 PetscBT bitmask=NULL; 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 /* 1291 No need to setup local scatters if 1292 - primal space is unchanged 1293 AND 1294 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1295 AND 1296 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1297 */ 1298 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1299 PetscFunctionReturn(0); 1300 } 1301 /* destroy old objects */ 1302 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1303 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1304 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1305 /* Set Non-overlapping dimensions */ 1306 n_B = pcis->n_B; 1307 n_D = pcis->n - n_B; 1308 n_vertices = pcbddc->n_vertices; 1309 1310 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1311 1312 /* create auxiliary bitmask and allocate workspace */ 1313 if (!sub_schurs->reuse_mumps) { 1314 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1315 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1316 for (i=0;i<n_vertices;i++) { 1317 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1318 } 1319 1320 for (i=0, n_R=0; i<pcis->n; i++) { 1321 if (!PetscBTLookup(bitmask,i)) { 1322 idx_R_local[n_R++] = i; 1323 } 1324 } 1325 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1326 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1327 1328 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1329 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1330 } 1331 1332 /* Block code */ 1333 vbs = 1; 1334 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1335 if (bs>1 && !(n_vertices%bs)) { 1336 PetscBool is_blocked = PETSC_TRUE; 1337 PetscInt *vary; 1338 if (!sub_schurs->reuse_mumps) { 1339 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1340 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1341 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1342 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1343 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1344 for (i=0; i<pcis->n/bs; i++) { 1345 if (vary[i]!=0 && vary[i]!=bs) { 1346 is_blocked = PETSC_FALSE; 1347 break; 1348 } 1349 } 1350 ierr = PetscFree(vary);CHKERRQ(ierr); 1351 } else { 1352 /* Verify directly the R set */ 1353 for (i=0; i<n_R/bs; i++) { 1354 PetscInt j,node=idx_R_local[bs*i]; 1355 for (j=1; j<bs; j++) { 1356 if (node != idx_R_local[bs*i+j]-j) { 1357 is_blocked = PETSC_FALSE; 1358 break; 1359 } 1360 } 1361 } 1362 } 1363 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1364 vbs = bs; 1365 for (i=0;i<n_R/vbs;i++) { 1366 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1367 } 1368 } 1369 } 1370 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1371 if (sub_schurs->reuse_mumps) { 1372 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1373 1374 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1375 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1376 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1377 reuse_mumps->is_R = pcbddc->is_R_local; 1378 } else { 1379 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1380 } 1381 1382 /* print some info if requested */ 1383 if (pcbddc->dbg_flag) { 1384 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1385 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1386 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1388 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1389 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1390 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1391 } 1392 1393 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1394 if (!sub_schurs->reuse_mumps) { 1395 IS is_aux1,is_aux2; 1396 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1397 1398 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1399 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1400 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1401 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1402 for (i=0; i<n_D; i++) { 1403 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1404 } 1405 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1406 for (i=0, j=0; i<n_R; i++) { 1407 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1408 aux_array1[j++] = i; 1409 } 1410 } 1411 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1412 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1413 for (i=0, j=0; i<n_B; i++) { 1414 if (!PetscBTLookup(bitmask,is_indices[i])) { 1415 aux_array2[j++] = i; 1416 } 1417 } 1418 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1419 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1420 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1421 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1422 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1423 1424 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1425 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1426 for (i=0, j=0; i<n_R; i++) { 1427 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1428 aux_array1[j++] = i; 1429 } 1430 } 1431 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1432 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1433 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1434 } 1435 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1436 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1437 } else { 1438 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1439 IS tis; 1440 PetscInt schur_size; 1441 1442 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1443 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1444 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1445 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1446 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1447 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1448 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1449 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1450 } 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1458 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1459 { 1460 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1461 PC_IS *pcis = (PC_IS*)pc->data; 1462 PC pc_temp; 1463 Mat A_RR; 1464 MatReuse reuse; 1465 PetscScalar m_one = -1.0; 1466 PetscReal value; 1467 PetscInt n_D,n_R; 1468 PetscBool check_corr[2],issbaij; 1469 PetscErrorCode ierr; 1470 /* prefixes stuff */ 1471 char dir_prefix[256],neu_prefix[256],str_level[16]; 1472 size_t len; 1473 1474 PetscFunctionBegin; 1475 1476 /* compute prefixes */ 1477 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1478 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1479 if (!pcbddc->current_level) { 1480 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1481 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1482 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1483 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1484 } else { 1485 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1486 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1487 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1488 len -= 15; /* remove "pc_bddc_coarse_" */ 1489 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1490 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1491 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1492 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1493 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1494 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1495 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1496 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1497 } 1498 1499 /* DIRICHLET PROBLEM */ 1500 if (dirichlet) { 1501 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1502 if (pcbddc->local_mat->symmetric_set) { 1503 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1504 } 1505 /* Matrix for Dirichlet problem is pcis->A_II */ 1506 n_D = pcis->n - pcis->n_B; 1507 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1508 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1509 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1510 /* default */ 1511 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1512 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1513 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1514 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1515 if (issbaij) { 1516 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1517 } else { 1518 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1519 } 1520 /* Allow user's customization */ 1521 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1522 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1523 } 1524 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1525 if (sub_schurs->reuse_mumps) { 1526 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1527 1528 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1529 } 1530 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1531 if (!n_D) { 1532 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1533 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1534 } 1535 /* Set Up KSP for Dirichlet problem of BDDC */ 1536 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1537 /* set ksp_D into pcis data */ 1538 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1539 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1540 pcis->ksp_D = pcbddc->ksp_D; 1541 } 1542 1543 /* NEUMANN PROBLEM */ 1544 A_RR = 0; 1545 if (neumann) { 1546 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1547 PetscInt ibs,mbs; 1548 PetscBool issbaij; 1549 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1550 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1551 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1552 if (pcbddc->ksp_R) { /* already created ksp */ 1553 PetscInt nn_R; 1554 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1555 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1556 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1557 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1558 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1559 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1560 reuse = MAT_INITIAL_MATRIX; 1561 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1562 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1563 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1564 reuse = MAT_INITIAL_MATRIX; 1565 } else { /* safe to reuse the matrix */ 1566 reuse = MAT_REUSE_MATRIX; 1567 } 1568 } 1569 /* last check */ 1570 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1571 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1572 reuse = MAT_INITIAL_MATRIX; 1573 } 1574 } else { /* first time, so we need to create the matrix */ 1575 reuse = MAT_INITIAL_MATRIX; 1576 } 1577 /* extract A_RR */ 1578 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1579 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1581 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1582 if (matis->A == pcbddc->local_mat) { 1583 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1584 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1585 } else { 1586 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1587 } 1588 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1589 if (matis->A == pcbddc->local_mat) { 1590 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1591 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1592 } else { 1593 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1594 } 1595 } 1596 if (!sub_schurs->reuse_mumps) { 1597 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1598 if (pcbddc->local_mat->symmetric_set) { 1599 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1600 } 1601 } else { 1602 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1603 1604 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1605 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1606 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1607 } 1608 if (!pcbddc->ksp_R) { /* create object if not present */ 1609 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1610 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1611 /* default */ 1612 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1613 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1614 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1615 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1616 if (issbaij) { 1617 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1618 } else { 1619 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1620 } 1621 /* Allow user's customization */ 1622 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1623 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1624 } 1625 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1626 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1627 if (!n_R) { 1628 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1629 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1630 } 1631 /* Reuse MUMPS solver if it is present */ 1632 if (sub_schurs->reuse_mumps) { 1633 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1634 1635 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1636 } 1637 /* Set Up KSP for Neumann problem of BDDC */ 1638 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1639 } 1640 /* free Neumann problem's matrix */ 1641 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1642 1643 if (pcbddc->dbg_flag) { 1644 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1645 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1646 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1647 } 1648 1649 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 1650 check_corr[0] = check_corr[1] = PETSC_FALSE; 1651 if (pcbddc->NullSpace_corr[0]) { 1652 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 1653 } 1654 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 1655 check_corr[0] = PETSC_TRUE; 1656 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 1657 } 1658 if (neumann && pcbddc->NullSpace_corr[2]) { 1659 check_corr[1] = PETSC_TRUE; 1660 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 1661 } 1662 1663 /* check Dirichlet and Neumann solvers */ 1664 if (pcbddc->dbg_flag) { 1665 if (dirichlet) { /* Dirichlet */ 1666 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1667 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1668 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1669 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1670 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1672 if (check_corr[0]) { 1673 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 1674 } 1675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1676 } 1677 if (neumann) { /* Neumann */ 1678 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1679 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1680 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1681 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1682 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1683 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1685 if (check_corr[1]) { 1686 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 1687 } 1688 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1689 } 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1696 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1697 { 1698 PetscErrorCode ierr; 1699 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1700 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1701 1702 PetscFunctionBegin; 1703 if (!sub_schurs->reuse_mumps) { 1704 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1705 } 1706 if (!pcbddc->switch_static) { 1707 if (applytranspose && pcbddc->local_auxmat1) { 1708 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1709 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1710 } 1711 if (!sub_schurs->reuse_mumps) { 1712 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1713 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1714 } else { 1715 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1716 1717 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1718 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1719 } 1720 } else { 1721 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1722 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1723 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1724 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1725 if (applytranspose && pcbddc->local_auxmat1) { 1726 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1727 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1728 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1729 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 } 1731 } 1732 if (!sub_schurs->reuse_mumps) { 1733 if (applytranspose) { 1734 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1735 } else { 1736 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1737 } 1738 } else { 1739 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1740 1741 if (applytranspose) { 1742 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1743 } else { 1744 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1745 } 1746 } 1747 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1748 if (!pcbddc->switch_static) { 1749 if (!sub_schurs->reuse_mumps) { 1750 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1751 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1752 } else { 1753 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1754 1755 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1756 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1757 } 1758 if (!applytranspose && pcbddc->local_auxmat1) { 1759 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1760 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1761 } 1762 } else { 1763 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1764 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1765 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1766 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1767 if (!applytranspose && pcbddc->local_auxmat1) { 1768 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1769 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1770 } 1771 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1772 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1773 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1774 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778 1779 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1782 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1783 { 1784 PetscErrorCode ierr; 1785 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1786 PC_IS* pcis = (PC_IS*) (pc->data); 1787 const PetscScalar zero = 0.0; 1788 1789 PetscFunctionBegin; 1790 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1791 if (applytranspose) { 1792 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1793 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1794 } else { 1795 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1796 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1797 } 1798 /* start communications from local primal nodes to rhs of coarse solver */ 1799 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1800 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1802 1803 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1804 /* TODO remove null space when doing multilevel */ 1805 if (pcbddc->coarse_ksp) { 1806 Vec rhs,sol; 1807 1808 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1809 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1810 if (applytranspose) { 1811 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1812 } else { 1813 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1814 } 1815 } 1816 1817 /* Local solution on R nodes */ 1818 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1819 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1820 } 1821 1822 /* communications from coarse sol to local primal nodes */ 1823 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1824 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1825 1826 /* Sum contributions from two levels */ 1827 if (applytranspose) { 1828 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1829 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1830 } else { 1831 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1832 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1833 } 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1839 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1840 { 1841 PetscErrorCode ierr; 1842 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1843 PetscScalar *array; 1844 Vec from,to; 1845 1846 PetscFunctionBegin; 1847 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1848 from = pcbddc->coarse_vec; 1849 to = pcbddc->vec1_P; 1850 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1851 Vec tvec; 1852 1853 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1854 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1855 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1856 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1857 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1858 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1859 } 1860 } else { /* from local to global -> put data in coarse right hand side */ 1861 from = pcbddc->vec1_P; 1862 to = pcbddc->coarse_vec; 1863 } 1864 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1865 PetscFunctionReturn(0); 1866 } 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1870 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1871 { 1872 PetscErrorCode ierr; 1873 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1874 PetscScalar *array; 1875 Vec from,to; 1876 1877 PetscFunctionBegin; 1878 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1879 from = pcbddc->coarse_vec; 1880 to = pcbddc->vec1_P; 1881 } else { /* from local to global -> put data in coarse right hand side */ 1882 from = pcbddc->vec1_P; 1883 to = pcbddc->coarse_vec; 1884 } 1885 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1886 if (smode == SCATTER_FORWARD) { 1887 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1888 Vec tvec; 1889 1890 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1891 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1892 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1893 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1894 } 1895 } else { 1896 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1897 ierr = VecResetArray(from);CHKERRQ(ierr); 1898 } 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 /* uncomment for testing purposes */ 1904 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1905 #undef __FUNCT__ 1906 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1907 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1908 { 1909 PetscErrorCode ierr; 1910 PC_IS* pcis = (PC_IS*)(pc->data); 1911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1912 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1913 /* one and zero */ 1914 PetscScalar one=1.0,zero=0.0; 1915 /* space to store constraints and their local indices */ 1916 PetscScalar *constraints_data; 1917 PetscInt *constraints_idxs,*constraints_idxs_B; 1918 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1919 PetscInt *constraints_n; 1920 /* iterators */ 1921 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1922 /* BLAS integers */ 1923 PetscBLASInt lwork,lierr; 1924 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1925 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1926 /* reuse */ 1927 PetscInt olocal_primal_size,olocal_primal_size_cc; 1928 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1929 /* change of basis */ 1930 PetscBool qr_needed; 1931 PetscBT change_basis,qr_needed_idx; 1932 /* auxiliary stuff */ 1933 PetscInt *nnz,*is_indices; 1934 PetscInt ncc; 1935 /* some quantities */ 1936 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1937 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1938 1939 PetscFunctionBegin; 1940 /* Destroy Mat objects computed previously */ 1941 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1942 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1943 /* save info on constraints from previous setup (if any) */ 1944 olocal_primal_size = pcbddc->local_primal_size; 1945 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1946 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1947 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1948 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1949 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1950 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1951 1952 /* print some info */ 1953 if (pcbddc->dbg_flag) { 1954 IS vertices; 1955 PetscInt nv,nedges,nfaces; 1956 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1957 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1958 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1959 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1960 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1962 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1963 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1964 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1965 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1966 } 1967 1968 if (!pcbddc->adaptive_selection) { 1969 IS ISForVertices,*ISForFaces,*ISForEdges; 1970 MatNullSpace nearnullsp; 1971 const Vec *nearnullvecs; 1972 Vec *localnearnullsp; 1973 PetscScalar *array; 1974 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1975 PetscBool nnsp_has_cnst; 1976 /* LAPACK working arrays for SVD or POD */ 1977 PetscBool skip_lapack,boolforchange; 1978 PetscScalar *work; 1979 PetscReal *singular_vals; 1980 #if defined(PETSC_USE_COMPLEX) 1981 PetscReal *rwork; 1982 #endif 1983 #if defined(PETSC_MISSING_LAPACK_GESVD) 1984 PetscScalar *temp_basis,*correlation_mat; 1985 #else 1986 PetscBLASInt dummy_int=1; 1987 PetscScalar dummy_scalar=1.; 1988 #endif 1989 1990 /* Get index sets for faces, edges and vertices from graph */ 1991 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1992 /* free unneeded index sets */ 1993 if (!pcbddc->use_vertices) { 1994 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1995 } 1996 if (!pcbddc->use_edges) { 1997 for (i=0;i<n_ISForEdges;i++) { 1998 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1999 } 2000 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2001 n_ISForEdges = 0; 2002 } 2003 if (!pcbddc->use_faces) { 2004 for (i=0;i<n_ISForFaces;i++) { 2005 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2006 } 2007 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2008 n_ISForFaces = 0; 2009 } 2010 2011 /* check if near null space is attached to global mat */ 2012 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2013 if (nearnullsp) { 2014 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2015 /* remove any stored info */ 2016 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2017 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2018 /* store information for BDDC solver reuse */ 2019 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2020 pcbddc->onearnullspace = nearnullsp; 2021 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2022 for (i=0;i<nnsp_size;i++) { 2023 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2024 } 2025 } else { /* if near null space is not provided BDDC uses constants by default */ 2026 nnsp_size = 0; 2027 nnsp_has_cnst = PETSC_TRUE; 2028 } 2029 /* get max number of constraints on a single cc */ 2030 max_constraints = nnsp_size; 2031 if (nnsp_has_cnst) max_constraints++; 2032 2033 /* 2034 Evaluate maximum storage size needed by the procedure 2035 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2036 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2037 There can be multiple constraints per connected component 2038 */ 2039 n_vertices = 0; 2040 if (ISForVertices) { 2041 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2042 } 2043 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2044 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2045 2046 total_counts = n_ISForFaces+n_ISForEdges; 2047 total_counts *= max_constraints; 2048 total_counts += n_vertices; 2049 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2050 2051 total_counts = 0; 2052 max_size_of_constraint = 0; 2053 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2054 IS used_is; 2055 if (i<n_ISForEdges) { 2056 used_is = ISForEdges[i]; 2057 } else { 2058 used_is = ISForFaces[i-n_ISForEdges]; 2059 } 2060 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2061 total_counts += j; 2062 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2063 } 2064 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2065 2066 /* get local part of global near null space vectors */ 2067 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2068 for (k=0;k<nnsp_size;k++) { 2069 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2070 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2071 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 } 2073 2074 /* whether or not to skip lapack calls */ 2075 skip_lapack = PETSC_TRUE; 2076 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2077 2078 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2079 if (!skip_lapack) { 2080 PetscScalar temp_work; 2081 2082 #if defined(PETSC_MISSING_LAPACK_GESVD) 2083 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2084 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2085 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2086 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2087 #if defined(PETSC_USE_COMPLEX) 2088 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2089 #endif 2090 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2091 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2093 lwork = -1; 2094 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2095 #if !defined(PETSC_USE_COMPLEX) 2096 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2097 #else 2098 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2099 #endif 2100 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2101 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2102 #else /* on missing GESVD */ 2103 /* SVD */ 2104 PetscInt max_n,min_n; 2105 max_n = max_size_of_constraint; 2106 min_n = max_constraints; 2107 if (max_size_of_constraint < max_constraints) { 2108 min_n = max_size_of_constraint; 2109 max_n = max_constraints; 2110 } 2111 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2112 #if defined(PETSC_USE_COMPLEX) 2113 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2114 #endif 2115 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2116 lwork = -1; 2117 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2118 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2119 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2120 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2121 #if !defined(PETSC_USE_COMPLEX) 2122 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2123 #else 2124 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2125 #endif 2126 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2127 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2128 #endif /* on missing GESVD */ 2129 /* Allocate optimal workspace */ 2130 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2131 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2132 } 2133 /* Now we can loop on constraining sets */ 2134 total_counts = 0; 2135 constraints_idxs_ptr[0] = 0; 2136 constraints_data_ptr[0] = 0; 2137 /* vertices */ 2138 if (n_vertices) { 2139 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2140 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2141 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2142 for (i=0;i<n_vertices;i++) { 2143 constraints_n[total_counts] = 1; 2144 constraints_data[total_counts] = 1.0; 2145 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2146 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2147 total_counts++; 2148 } 2149 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2150 PetscBool used_vertex; 2151 for (i=0;i<n_vertices;i++) { 2152 used_vertex = PETSC_FALSE; 2153 k = 0; 2154 while (!used_vertex && k<nnsp_size) { 2155 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2156 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2157 constraints_n[total_counts] = 1; 2158 constraints_idxs[total_counts] = is_indices[i]; 2159 constraints_data[total_counts] = 1.0; 2160 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2161 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2162 total_counts++; 2163 used_vertex = PETSC_TRUE; 2164 } 2165 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2166 k++; 2167 } 2168 } 2169 } 2170 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2171 n_vertices = total_counts; 2172 } 2173 2174 /* edges and faces */ 2175 total_counts_cc = total_counts; 2176 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2177 IS used_is; 2178 PetscBool idxs_copied = PETSC_FALSE; 2179 2180 if (ncc<n_ISForEdges) { 2181 used_is = ISForEdges[ncc]; 2182 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2183 } else { 2184 used_is = ISForFaces[ncc-n_ISForEdges]; 2185 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2186 } 2187 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2188 2189 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2190 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2191 /* change of basis should not be performed on local periodic nodes */ 2192 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2193 if (nnsp_has_cnst) { 2194 PetscScalar quad_value; 2195 2196 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2197 idxs_copied = PETSC_TRUE; 2198 2199 if (!pcbddc->use_nnsp_true) { 2200 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2201 } else { 2202 quad_value = 1.0; 2203 } 2204 for (j=0;j<size_of_constraint;j++) { 2205 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2206 } 2207 temp_constraints++; 2208 total_counts++; 2209 } 2210 for (k=0;k<nnsp_size;k++) { 2211 PetscReal real_value; 2212 PetscScalar *ptr_to_data; 2213 2214 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2215 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2216 for (j=0;j<size_of_constraint;j++) { 2217 ptr_to_data[j] = array[is_indices[j]]; 2218 } 2219 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2220 /* check if array is null on the connected component */ 2221 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2222 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2223 if (real_value > 0.0) { /* keep indices and values */ 2224 temp_constraints++; 2225 total_counts++; 2226 if (!idxs_copied) { 2227 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2228 idxs_copied = PETSC_TRUE; 2229 } 2230 } 2231 } 2232 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2233 valid_constraints = temp_constraints; 2234 if (!pcbddc->use_nnsp_true && temp_constraints) { 2235 if (temp_constraints == 1) { /* just normalize the constraint */ 2236 PetscScalar norm,*ptr_to_data; 2237 2238 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2239 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2240 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2241 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2242 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2243 } else { /* perform SVD */ 2244 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2245 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2246 2247 #if defined(PETSC_MISSING_LAPACK_GESVD) 2248 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2249 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2250 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2251 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2252 from that computed using LAPACKgesvd 2253 -> This is due to a different computation of eigenvectors in LAPACKheev 2254 -> The quality of the POD-computed basis will be the same */ 2255 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2256 /* Store upper triangular part of correlation matrix */ 2257 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2258 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2259 for (j=0;j<temp_constraints;j++) { 2260 for (k=0;k<j+1;k++) { 2261 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2262 } 2263 } 2264 /* compute eigenvalues and eigenvectors of correlation matrix */ 2265 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2266 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2267 #if !defined(PETSC_USE_COMPLEX) 2268 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2269 #else 2270 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2271 #endif 2272 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2273 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2274 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2275 j = 0; 2276 while (j < temp_constraints && singular_vals[j] < tol) j++; 2277 total_counts = total_counts-j; 2278 valid_constraints = temp_constraints-j; 2279 /* scale and copy POD basis into used quadrature memory */ 2280 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2281 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2282 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2283 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2284 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2285 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2286 if (j<temp_constraints) { 2287 PetscInt ii; 2288 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2289 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2290 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2291 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2292 for (k=0;k<temp_constraints-j;k++) { 2293 for (ii=0;ii<size_of_constraint;ii++) { 2294 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2295 } 2296 } 2297 } 2298 #else /* on missing GESVD */ 2299 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2300 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2301 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2302 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2303 #if !defined(PETSC_USE_COMPLEX) 2304 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2305 #else 2306 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2307 #endif 2308 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2309 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2310 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2311 k = temp_constraints; 2312 if (k > size_of_constraint) k = size_of_constraint; 2313 j = 0; 2314 while (j < k && singular_vals[k-j-1] < tol) j++; 2315 valid_constraints = k-j; 2316 total_counts = total_counts-temp_constraints+valid_constraints; 2317 #endif /* on missing GESVD */ 2318 } 2319 } 2320 /* update pointers information */ 2321 if (valid_constraints) { 2322 constraints_n[total_counts_cc] = valid_constraints; 2323 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2324 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2325 /* set change_of_basis flag */ 2326 if (boolforchange) { 2327 PetscBTSet(change_basis,total_counts_cc); 2328 } 2329 total_counts_cc++; 2330 } 2331 } 2332 /* free workspace */ 2333 if (!skip_lapack) { 2334 ierr = PetscFree(work);CHKERRQ(ierr); 2335 #if defined(PETSC_USE_COMPLEX) 2336 ierr = PetscFree(rwork);CHKERRQ(ierr); 2337 #endif 2338 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2339 #if defined(PETSC_MISSING_LAPACK_GESVD) 2340 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2341 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2342 #endif 2343 } 2344 for (k=0;k<nnsp_size;k++) { 2345 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2346 } 2347 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2348 /* free index sets of faces, edges and vertices */ 2349 for (i=0;i<n_ISForFaces;i++) { 2350 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2351 } 2352 if (n_ISForFaces) { 2353 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2354 } 2355 for (i=0;i<n_ISForEdges;i++) { 2356 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2357 } 2358 if (n_ISForEdges) { 2359 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2360 } 2361 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2362 } else { 2363 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2364 2365 total_counts = 0; 2366 n_vertices = 0; 2367 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2368 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2369 } 2370 max_constraints = 0; 2371 total_counts_cc = 0; 2372 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2373 total_counts += pcbddc->adaptive_constraints_n[i]; 2374 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2375 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2376 } 2377 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2378 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2379 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2380 constraints_data = pcbddc->adaptive_constraints_data; 2381 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2382 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2383 total_counts_cc = 0; 2384 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2385 if (pcbddc->adaptive_constraints_n[i]) { 2386 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2387 } 2388 } 2389 #if 0 2390 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2391 for (i=0;i<total_counts_cc;i++) { 2392 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2393 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2394 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2395 printf(" %d",constraints_idxs[j]); 2396 } 2397 printf("\n"); 2398 printf("number of cc: %d\n",constraints_n[i]); 2399 } 2400 for (i=0;i<n_vertices;i++) { 2401 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2402 } 2403 for (i=0;i<sub_schurs->n_subs;i++) { 2404 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2405 } 2406 #endif 2407 2408 max_size_of_constraint = 0; 2409 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2410 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2411 /* Change of basis */ 2412 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2413 if (pcbddc->use_change_of_basis) { 2414 for (i=0;i<sub_schurs->n_subs;i++) { 2415 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2416 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2417 } 2418 } 2419 } 2420 } 2421 pcbddc->local_primal_size = total_counts; 2422 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2423 2424 /* map constraints_idxs in boundary numbering */ 2425 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2426 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 2427 2428 /* Create constraint matrix */ 2429 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2430 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2431 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2432 2433 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2434 /* determine if a QR strategy is needed for change of basis */ 2435 qr_needed = PETSC_FALSE; 2436 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2437 total_primal_vertices=0; 2438 pcbddc->local_primal_size_cc = 0; 2439 for (i=0;i<total_counts_cc;i++) { 2440 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2441 if (size_of_constraint == 1) { 2442 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2443 pcbddc->local_primal_size_cc += 1; 2444 } else if (PetscBTLookup(change_basis,i)) { 2445 for (k=0;k<constraints_n[i];k++) { 2446 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2447 } 2448 pcbddc->local_primal_size_cc += constraints_n[i]; 2449 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2450 PetscBTSet(qr_needed_idx,i); 2451 qr_needed = PETSC_TRUE; 2452 } 2453 } else { 2454 pcbddc->local_primal_size_cc += 1; 2455 } 2456 } 2457 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2458 pcbddc->n_vertices = total_primal_vertices; 2459 /* permute indices in order to have a sorted set of vertices */ 2460 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2461 2462 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2463 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2464 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2465 2466 /* nonzero structure of constraint matrix */ 2467 /* and get reference dof for local constraints */ 2468 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2469 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2470 2471 j = total_primal_vertices; 2472 total_counts = total_primal_vertices; 2473 cum = total_primal_vertices; 2474 for (i=n_vertices;i<total_counts_cc;i++) { 2475 if (!PetscBTLookup(change_basis,i)) { 2476 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2477 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2478 cum++; 2479 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2480 for (k=0;k<constraints_n[i];k++) { 2481 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2482 nnz[j+k] = size_of_constraint; 2483 } 2484 j += constraints_n[i]; 2485 } 2486 } 2487 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2488 ierr = PetscFree(nnz);CHKERRQ(ierr); 2489 2490 /* set values in constraint matrix */ 2491 for (i=0;i<total_primal_vertices;i++) { 2492 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2493 } 2494 total_counts = total_primal_vertices; 2495 for (i=n_vertices;i<total_counts_cc;i++) { 2496 if (!PetscBTLookup(change_basis,i)) { 2497 PetscInt *cols; 2498 2499 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2500 cols = constraints_idxs+constraints_idxs_ptr[i]; 2501 for (k=0;k<constraints_n[i];k++) { 2502 PetscInt row = total_counts+k; 2503 PetscScalar *vals; 2504 2505 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2506 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2507 } 2508 total_counts += constraints_n[i]; 2509 } 2510 } 2511 /* assembling */ 2512 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2513 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2514 2515 /* 2516 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2517 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2518 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2519 */ 2520 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2521 if (pcbddc->use_change_of_basis) { 2522 /* dual and primal dofs on a single cc */ 2523 PetscInt dual_dofs,primal_dofs; 2524 /* working stuff for GEQRF */ 2525 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2526 PetscBLASInt lqr_work; 2527 /* working stuff for UNGQR */ 2528 PetscScalar *gqr_work,lgqr_work_t; 2529 PetscBLASInt lgqr_work; 2530 /* working stuff for TRTRS */ 2531 PetscScalar *trs_rhs; 2532 PetscBLASInt Blas_NRHS; 2533 /* pointers for values insertion into change of basis matrix */ 2534 PetscInt *start_rows,*start_cols; 2535 PetscScalar *start_vals; 2536 /* working stuff for values insertion */ 2537 PetscBT is_primal; 2538 PetscInt *aux_primal_numbering_B; 2539 /* matrix sizes */ 2540 PetscInt global_size,local_size; 2541 /* temporary change of basis */ 2542 Mat localChangeOfBasisMatrix; 2543 /* extra space for debugging */ 2544 PetscScalar *dbg_work; 2545 2546 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2547 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2548 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2549 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2550 /* nonzeros for local mat */ 2551 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2552 for (i=0;i<pcis->n;i++) nnz[i]=1; 2553 for (i=n_vertices;i<total_counts_cc;i++) { 2554 if (PetscBTLookup(change_basis,i)) { 2555 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2556 if (PetscBTLookup(qr_needed_idx,i)) { 2557 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2558 } else { 2559 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2560 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2561 } 2562 } 2563 } 2564 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2565 ierr = PetscFree(nnz);CHKERRQ(ierr); 2566 /* Set initial identity in the matrix */ 2567 for (i=0;i<pcis->n;i++) { 2568 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2569 } 2570 2571 if (pcbddc->dbg_flag) { 2572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2573 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2574 } 2575 2576 2577 /* Now we loop on the constraints which need a change of basis */ 2578 /* 2579 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2580 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2581 2582 Basic blocks of change of basis matrix T computed by 2583 2584 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2585 2586 | 1 0 ... 0 s_1/S | 2587 | 0 1 ... 0 s_2/S | 2588 | ... | 2589 | 0 ... 1 s_{n-1}/S | 2590 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2591 2592 with S = \sum_{i=1}^n s_i^2 2593 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2594 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2595 2596 - QR decomposition of constraints otherwise 2597 */ 2598 if (qr_needed) { 2599 /* space to store Q */ 2600 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2601 /* first we issue queries for optimal work */ 2602 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2603 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2604 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2605 lqr_work = -1; 2606 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2607 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2608 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2609 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2610 lgqr_work = -1; 2611 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2612 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2613 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2614 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2615 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2616 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2617 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2618 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2619 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2620 /* array to store scaling factors for reflectors */ 2621 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2622 /* array to store rhs and solution of triangular solver */ 2623 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2624 /* allocating workspace for check */ 2625 if (pcbddc->dbg_flag) { 2626 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2627 } 2628 } 2629 /* array to store whether a node is primal or not */ 2630 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2631 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2632 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2633 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 2634 for (i=0;i<total_primal_vertices;i++) { 2635 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2636 } 2637 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2638 2639 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2640 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2641 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2642 if (PetscBTLookup(change_basis,total_counts)) { 2643 /* get constraint info */ 2644 primal_dofs = constraints_n[total_counts]; 2645 dual_dofs = size_of_constraint-primal_dofs; 2646 2647 if (pcbddc->dbg_flag) { 2648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2649 } 2650 2651 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2652 2653 /* copy quadrature constraints for change of basis check */ 2654 if (pcbddc->dbg_flag) { 2655 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2656 } 2657 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2658 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2659 2660 /* compute QR decomposition of constraints */ 2661 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2662 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2663 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2664 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2665 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2666 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2667 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2668 2669 /* explictly compute R^-T */ 2670 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2671 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2672 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2673 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2674 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2675 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2676 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2677 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2678 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2679 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2680 2681 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2682 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2683 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2684 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2685 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2686 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2687 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2688 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2689 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2690 2691 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2692 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2693 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2695 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2696 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2697 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2698 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2699 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2700 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2701 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2702 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2703 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2704 2705 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2706 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2707 /* insert cols for primal dofs */ 2708 for (j=0;j<primal_dofs;j++) { 2709 start_vals = &qr_basis[j*size_of_constraint]; 2710 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2711 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2712 } 2713 /* insert cols for dual dofs */ 2714 for (j=0,k=0;j<dual_dofs;k++) { 2715 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2716 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2717 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2718 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2719 j++; 2720 } 2721 } 2722 2723 /* check change of basis */ 2724 if (pcbddc->dbg_flag) { 2725 PetscInt ii,jj; 2726 PetscBool valid_qr=PETSC_TRUE; 2727 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2734 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2735 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2736 for (jj=0;jj<size_of_constraint;jj++) { 2737 for (ii=0;ii<primal_dofs;ii++) { 2738 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2739 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2740 } 2741 } 2742 if (!valid_qr) { 2743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2744 for (jj=0;jj<size_of_constraint;jj++) { 2745 for (ii=0;ii<primal_dofs;ii++) { 2746 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2747 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2748 } 2749 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2750 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2751 } 2752 } 2753 } 2754 } else { 2755 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2756 } 2757 } 2758 } else { /* simple transformation block */ 2759 PetscInt row,col; 2760 PetscScalar val,norm; 2761 2762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2763 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2764 for (j=0;j<size_of_constraint;j++) { 2765 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2766 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2767 if (!PetscBTLookup(is_primal,row_B)) { 2768 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2769 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2770 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2771 } else { 2772 for (k=0;k<size_of_constraint;k++) { 2773 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2774 if (row != col) { 2775 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2776 } else { 2777 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2778 } 2779 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2780 } 2781 } 2782 } 2783 if (pcbddc->dbg_flag) { 2784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2785 } 2786 } 2787 } else { 2788 if (pcbddc->dbg_flag) { 2789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2790 } 2791 } 2792 } 2793 2794 /* free workspace */ 2795 if (qr_needed) { 2796 if (pcbddc->dbg_flag) { 2797 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2798 } 2799 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2800 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2801 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2802 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2803 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2804 } 2805 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2806 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2807 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2808 2809 /* assembling of global change of variable */ 2810 { 2811 Mat tmat; 2812 PetscInt bs; 2813 2814 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2815 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2816 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2817 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2818 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2819 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2820 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2821 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2822 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2823 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2824 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2825 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2826 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2827 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2828 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2829 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2830 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2831 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2832 } 2833 /* check */ 2834 if (pcbddc->dbg_flag) { 2835 PetscReal error; 2836 Vec x,x_change; 2837 2838 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2839 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2840 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2841 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2842 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2843 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2845 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2846 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2847 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2848 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2849 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2850 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2851 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2852 ierr = VecDestroy(&x);CHKERRQ(ierr); 2853 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2854 } 2855 2856 /* adapt sub_schurs computed (if any) */ 2857 if (pcbddc->use_deluxe_scaling) { 2858 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2859 if (sub_schurs->S_Ej_all) { 2860 Mat S_new,tmat; 2861 IS is_all_N; 2862 2863 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2864 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2865 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2866 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2867 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2868 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2869 sub_schurs->S_Ej_all = S_new; 2870 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2871 if (sub_schurs->sum_S_Ej_all) { 2872 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2873 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2874 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2875 sub_schurs->sum_S_Ej_all = S_new; 2876 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2877 } 2878 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2879 } 2880 } 2881 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2882 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2883 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2884 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2885 } 2886 2887 /* set up change of basis context */ 2888 if (pcbddc->ChangeOfBasisMatrix) { 2889 PCBDDCChange_ctx change_ctx; 2890 2891 if (!pcbddc->new_global_mat) { 2892 PetscInt global_size,local_size; 2893 2894 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2895 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2896 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2897 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2898 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2899 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2900 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2901 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2902 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2903 } else { 2904 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2905 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2906 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2907 } 2908 if (!pcbddc->user_ChangeOfBasisMatrix) { 2909 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2910 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2911 } else { 2912 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2913 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2914 } 2915 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2916 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2917 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2918 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2919 } 2920 2921 /* check if a new primal space has been introduced */ 2922 pcbddc->new_primal_space_local = PETSC_TRUE; 2923 if (olocal_primal_size == pcbddc->local_primal_size) { 2924 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2925 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2926 if (!pcbddc->new_primal_space_local) { 2927 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2928 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2929 } 2930 } 2931 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2932 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2933 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2934 2935 /* flush dbg viewer */ 2936 if (pcbddc->dbg_flag) { 2937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2938 } 2939 2940 /* free workspace */ 2941 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2942 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2943 if (!pcbddc->adaptive_selection) { 2944 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2945 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2946 } else { 2947 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2948 pcbddc->adaptive_constraints_idxs_ptr, 2949 pcbddc->adaptive_constraints_data_ptr, 2950 pcbddc->adaptive_constraints_idxs, 2951 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2952 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2953 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2954 } 2955 PetscFunctionReturn(0); 2956 } 2957 2958 #undef __FUNCT__ 2959 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2960 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2961 { 2962 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2963 PC_IS *pcis = (PC_IS*)pc->data; 2964 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2965 PetscInt ierr,i,vertex_size,N; 2966 PetscViewer viewer=pcbddc->dbg_viewer; 2967 2968 PetscFunctionBegin; 2969 /* Reset previously computed graph */ 2970 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2971 /* Init local Graph struct */ 2972 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2973 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 2974 2975 /* Check validity of the csr graph passed in by the user */ 2976 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2977 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2978 } 2979 2980 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2981 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2982 PetscInt *xadj,*adjncy; 2983 PetscInt nvtxs; 2984 PetscBool flg_row=PETSC_FALSE; 2985 2986 if (pcbddc->use_local_adj) { 2987 2988 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2989 if (flg_row) { 2990 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2991 pcbddc->computed_rowadj = PETSC_TRUE; 2992 } 2993 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2994 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 2995 IS is_dummy; 2996 ISLocalToGlobalMapping l2gmap_dummy; 2997 PetscInt j,sum; 2998 PetscInt *cxadj,*cadjncy; 2999 const PetscInt *idxs; 3000 PCBDDCGraph graph; 3001 PetscBT is_on_boundary; 3002 3003 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3004 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3005 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3006 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3007 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3008 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3009 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3010 if (flg_row) { 3011 graph->xadj = xadj; 3012 graph->adjncy = adjncy; 3013 } 3014 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3015 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3016 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3017 3018 if (pcbddc->dbg_flag) { 3019 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3020 for (i=0;i<graph->ncc;i++) { 3021 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3022 } 3023 } 3024 3025 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3026 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3027 for (i=0;i<pcis->n_B;i++) { 3028 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3029 } 3030 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3031 3032 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3033 sum = 0; 3034 for (i=0;i<graph->ncc;i++) { 3035 PetscInt sizecc = 0; 3036 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3037 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3038 sizecc++; 3039 } 3040 } 3041 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3042 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3043 cxadj[graph->queue[j]] = sizecc; 3044 } 3045 } 3046 sum += sizecc*sizecc; 3047 } 3048 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3049 sum = 0; 3050 for (i=0;i<pcis->n;i++) { 3051 PetscInt temp = cxadj[i]; 3052 cxadj[i] = sum; 3053 sum += temp; 3054 } 3055 cxadj[pcis->n] = sum; 3056 for (i=0;i<graph->ncc;i++) { 3057 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3058 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3059 PetscInt k,sizecc = 0; 3060 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3061 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3062 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3063 sizecc++; 3064 } 3065 } 3066 } 3067 } 3068 } 3069 if (sum) { 3070 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3071 } else { 3072 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3073 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3074 } 3075 graph->xadj = 0; 3076 graph->adjncy = 0; 3077 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3078 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3079 } 3080 } 3081 if (pcbddc->dbg_flag) { 3082 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3083 } 3084 3085 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3086 vertex_size = 1; 3087 if (pcbddc->user_provided_isfordofs) { 3088 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3089 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3090 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3091 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3092 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3093 } 3094 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3095 pcbddc->n_ISForDofs = 0; 3096 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3097 } 3098 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3099 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3100 } else { 3101 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3102 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3103 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3104 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3105 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3106 } 3107 } 3108 } 3109 3110 /* Setup of Graph */ 3111 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3112 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3113 } 3114 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3115 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3116 } 3117 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3118 3119 /* Graph's connected components analysis */ 3120 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3121 3122 /* print some info to stdout */ 3123 if (pcbddc->dbg_flag) { 3124 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3125 } 3126 3127 /* mark topography has done */ 3128 pcbddc->recompute_topography = PETSC_FALSE; 3129 PetscFunctionReturn(0); 3130 } 3131 3132 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "PCBDDCSubsetNumbering" 3135 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3136 { 3137 PetscSF sf; 3138 PetscLayout map; 3139 const PetscInt *idxs; 3140 PetscInt *leaf_data,*root_data,*gidxs; 3141 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3142 PetscInt n_n,nlocals,start,first_index; 3143 PetscMPIInt commsize; 3144 PetscBool first_found; 3145 PetscErrorCode ierr; 3146 3147 PetscFunctionBegin; 3148 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3149 if (subset_mult) { 3150 PetscCheckSameComm(subset,1,subset_mult,2); 3151 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3152 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3153 } 3154 /* create workspace layout for computing global indices of subset */ 3155 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3156 lbounds[0] = lbounds[1] = 0; 3157 for (i=0;i<n;i++) { 3158 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3159 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3160 } 3161 lbounds[0] = -lbounds[0]; 3162 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3163 gbounds[0] = -gbounds[0]; 3164 N = gbounds[1] - gbounds[0] + 1; 3165 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3166 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3167 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3168 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3169 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3170 3171 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3172 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3173 if (subset_mult) { 3174 const PetscInt* idxs_mult; 3175 3176 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3177 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3178 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3179 } else { 3180 for (i=0;i<n;i++) leaf_data[i] = 1; 3181 } 3182 /* local size of new subset */ 3183 n_n = 0; 3184 for (i=0;i<n;i++) n_n += leaf_data[i]; 3185 3186 /* global indexes in layout */ 3187 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3188 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3189 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3190 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3191 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3192 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3193 3194 /* reduce from leaves to roots */ 3195 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3196 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3197 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3198 3199 /* count indexes in local part of layout */ 3200 nlocals = 0; 3201 first_index = -1; 3202 first_found = PETSC_FALSE; 3203 for (i=0;i<Nl;i++) { 3204 if (!first_found && root_data[i]) { 3205 first_found = PETSC_TRUE; 3206 first_index = i; 3207 } 3208 nlocals += root_data[i]; 3209 } 3210 3211 /* cumulative of number of indexes and size of subset without holes */ 3212 #if defined(PETSC_HAVE_MPI_EXSCAN) 3213 start = 0; 3214 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3215 #else 3216 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3217 start = start-nlocals; 3218 #endif 3219 3220 if (N_n) { /* compute total size of new subset if requested */ 3221 *N_n = start + nlocals; 3222 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3223 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3224 } 3225 3226 /* adapt root data with cumulative */ 3227 if (first_found) { 3228 PetscInt old_index; 3229 3230 root_data[first_index] += start; 3231 old_index = first_index; 3232 for (i=first_index+1;i<Nl;i++) { 3233 if (root_data[i]) { 3234 root_data[i] += root_data[old_index]; 3235 old_index = i; 3236 } 3237 } 3238 } 3239 3240 /* from roots to leaves */ 3241 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3242 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3243 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3244 3245 /* create new IS with global indexes without holes */ 3246 if (subset_mult) { 3247 const PetscInt* idxs_mult; 3248 PetscInt cum; 3249 3250 cum = 0; 3251 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3252 for (i=0;i<n;i++) { 3253 PetscInt j; 3254 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3255 } 3256 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3257 } else { 3258 for (i=0;i<n;i++) { 3259 gidxs[i] = leaf_data[i]-1; 3260 } 3261 } 3262 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3263 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3269 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3270 { 3271 PetscInt i,j; 3272 PetscScalar *alphas; 3273 PetscErrorCode ierr; 3274 3275 PetscFunctionBegin; 3276 /* this implements stabilized Gram-Schmidt */ 3277 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3278 for (i=0;i<n;i++) { 3279 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3280 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3281 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3282 } 3283 ierr = PetscFree(alphas);CHKERRQ(ierr); 3284 PetscFunctionReturn(0); 3285 } 3286 3287 #undef __FUNCT__ 3288 #define __FUNCT__ "MatISGetSubassemblingPattern" 3289 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3290 { 3291 IS ranks_send_to; 3292 PetscInt n_neighs,*neighs,*n_shared,**shared; 3293 PetscMPIInt size,rank,color; 3294 PetscInt *xadj,*adjncy; 3295 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3296 PetscInt i,local_size,threshold=0; 3297 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3298 PetscSubcomm subcomm; 3299 PetscErrorCode ierr; 3300 3301 PetscFunctionBegin; 3302 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3303 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3304 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3305 3306 /* Get info on mapping */ 3307 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3308 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3309 3310 /* build local CSR graph of subdomains' connectivity */ 3311 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3312 xadj[0] = 0; 3313 xadj[1] = PetscMax(n_neighs-1,0); 3314 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3315 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3316 3317 if (threshold) { 3318 PetscInt xadj_count = 0; 3319 for (i=1;i<n_neighs;i++) { 3320 if (n_shared[i] > threshold) { 3321 adjncy[xadj_count] = neighs[i]; 3322 adjncy_wgt[xadj_count] = n_shared[i]; 3323 xadj_count++; 3324 } 3325 } 3326 xadj[1] = xadj_count; 3327 } else { 3328 if (xadj[1]) { 3329 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3330 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3331 } 3332 } 3333 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3334 if (use_square) { 3335 for (i=0;i<xadj[1];i++) { 3336 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3337 } 3338 } 3339 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3340 3341 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3342 3343 /* 3344 Restrict work on active processes only. 3345 */ 3346 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3347 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3348 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3349 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3350 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3351 if (color) { 3352 ierr = PetscFree(xadj);CHKERRQ(ierr); 3353 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3354 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3355 } else { 3356 Mat subdomain_adj; 3357 IS new_ranks,new_ranks_contig; 3358 MatPartitioning partitioner; 3359 PetscInt prank,rstart=0,rend=0; 3360 PetscInt *is_indices,*oldranks; 3361 PetscBool aggregate; 3362 3363 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3364 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3365 prank = rank; 3366 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3367 /* 3368 for (i=0;i<size;i++) { 3369 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3370 } 3371 */ 3372 for (i=0;i<xadj[1];i++) { 3373 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3374 } 3375 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3376 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3377 if (aggregate) { 3378 PetscInt lrows,row,ncols,*cols; 3379 PetscMPIInt nrank; 3380 PetscScalar *vals; 3381 3382 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3383 lrows = 0; 3384 if (nrank<redprocs) { 3385 lrows = size/redprocs; 3386 if (nrank<size%redprocs) lrows++; 3387 } 3388 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3389 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3390 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3391 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3392 row = nrank; 3393 ncols = xadj[1]-xadj[0]; 3394 cols = adjncy; 3395 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3396 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3397 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3398 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3399 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3400 ierr = PetscFree(xadj);CHKERRQ(ierr); 3401 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3402 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3403 ierr = PetscFree(vals);CHKERRQ(ierr); 3404 } else { 3405 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3406 } 3407 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3408 3409 /* Partition */ 3410 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3411 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3412 if (use_vwgt) { 3413 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3414 v_wgt[0] = local_size; 3415 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3416 } 3417 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3418 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3419 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3420 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3421 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3422 3423 /* renumber new_ranks to avoid "holes" in new set of processors */ 3424 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3425 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3426 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3427 if (!redprocs) { 3428 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3429 } else { 3430 PetscInt idxs[1]; 3431 PetscMPIInt tag; 3432 MPI_Request *reqs; 3433 3434 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3435 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3436 for (i=rstart;i<rend;i++) { 3437 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3438 } 3439 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3440 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3441 ierr = PetscFree(reqs);CHKERRQ(ierr); 3442 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3443 } 3444 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3445 /* clean up */ 3446 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3447 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3448 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3449 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3450 } 3451 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3452 3453 /* assemble parallel IS for sends */ 3454 i = 1; 3455 if (color) i=0; 3456 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3457 /* get back IS */ 3458 *is_sends = ranks_send_to; 3459 PetscFunctionReturn(0); 3460 } 3461 3462 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3463 3464 #undef __FUNCT__ 3465 #define __FUNCT__ "MatISSubassemble" 3466 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3467 { 3468 Mat local_mat; 3469 IS is_sends_internal; 3470 PetscInt rows,cols,new_local_rows; 3471 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3472 PetscBool ismatis,isdense,newisdense,destroy_mat; 3473 ISLocalToGlobalMapping l2gmap; 3474 PetscInt* l2gmap_indices; 3475 const PetscInt* is_indices; 3476 MatType new_local_type; 3477 /* buffers */ 3478 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3479 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3480 PetscInt *recv_buffer_idxs_local; 3481 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3482 /* MPI */ 3483 MPI_Comm comm,comm_n; 3484 PetscSubcomm subcomm; 3485 PetscMPIInt n_sends,n_recvs,commsize; 3486 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3487 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3488 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3489 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3490 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3491 PetscErrorCode ierr; 3492 3493 PetscFunctionBegin; 3494 /* TODO: add missing checks */ 3495 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3496 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3497 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3498 PetscValidLogicalCollectiveInt(mat,nis,7); 3499 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3500 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3501 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3502 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3503 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3504 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3505 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3506 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3507 PetscInt mrows,mcols,mnrows,mncols; 3508 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3509 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3510 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3511 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3512 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3513 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3514 } 3515 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3516 PetscValidLogicalCollectiveInt(mat,bs,0); 3517 /* prepare IS for sending if not provided */ 3518 if (!is_sends) { 3519 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3520 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3521 } else { 3522 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3523 is_sends_internal = is_sends; 3524 } 3525 3526 /* get comm */ 3527 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3528 3529 /* compute number of sends */ 3530 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3531 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3532 3533 /* compute number of receives */ 3534 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3535 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3536 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3537 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3538 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3539 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3540 ierr = PetscFree(iflags);CHKERRQ(ierr); 3541 3542 /* restrict comm if requested */ 3543 subcomm = 0; 3544 destroy_mat = PETSC_FALSE; 3545 if (restrict_comm) { 3546 PetscMPIInt color,subcommsize; 3547 3548 color = 0; 3549 if (restrict_full) { 3550 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3551 } else { 3552 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3553 } 3554 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3555 subcommsize = commsize - subcommsize; 3556 /* check if reuse has been requested */ 3557 if (reuse == MAT_REUSE_MATRIX) { 3558 if (*mat_n) { 3559 PetscMPIInt subcommsize2; 3560 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3561 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3562 comm_n = PetscObjectComm((PetscObject)*mat_n); 3563 } else { 3564 comm_n = PETSC_COMM_SELF; 3565 } 3566 } else { /* MAT_INITIAL_MATRIX */ 3567 PetscMPIInt rank; 3568 3569 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3570 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3571 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3572 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3573 comm_n = PetscSubcommChild(subcomm); 3574 } 3575 /* flag to destroy *mat_n if not significative */ 3576 if (color) destroy_mat = PETSC_TRUE; 3577 } else { 3578 comm_n = comm; 3579 } 3580 3581 /* prepare send/receive buffers */ 3582 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3583 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3584 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3585 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3586 if (nis) { 3587 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3588 } 3589 3590 /* Get data from local matrices */ 3591 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3592 /* TODO: See below some guidelines on how to prepare the local buffers */ 3593 /* 3594 send_buffer_vals should contain the raw values of the local matrix 3595 send_buffer_idxs should contain: 3596 - MatType_PRIVATE type 3597 - PetscInt size_of_l2gmap 3598 - PetscInt global_row_indices[size_of_l2gmap] 3599 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3600 */ 3601 else { 3602 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3603 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3604 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3605 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3606 send_buffer_idxs[1] = i; 3607 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3608 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3609 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3610 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3611 for (i=0;i<n_sends;i++) { 3612 ilengths_vals[is_indices[i]] = len*len; 3613 ilengths_idxs[is_indices[i]] = len+2; 3614 } 3615 } 3616 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3617 /* additional is (if any) */ 3618 if (nis) { 3619 PetscMPIInt psum; 3620 PetscInt j; 3621 for (j=0,psum=0;j<nis;j++) { 3622 PetscInt plen; 3623 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3624 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3625 psum += len+1; /* indices + lenght */ 3626 } 3627 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3628 for (j=0,psum=0;j<nis;j++) { 3629 PetscInt plen; 3630 const PetscInt *is_array_idxs; 3631 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3632 send_buffer_idxs_is[psum] = plen; 3633 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3634 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3635 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3636 psum += plen+1; /* indices + lenght */ 3637 } 3638 for (i=0;i<n_sends;i++) { 3639 ilengths_idxs_is[is_indices[i]] = psum; 3640 } 3641 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3642 } 3643 3644 buf_size_idxs = 0; 3645 buf_size_vals = 0; 3646 buf_size_idxs_is = 0; 3647 for (i=0;i<n_recvs;i++) { 3648 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3649 buf_size_vals += (PetscInt)olengths_vals[i]; 3650 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3651 } 3652 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3653 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3654 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3655 3656 /* get new tags for clean communications */ 3657 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3658 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3659 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3660 3661 /* allocate for requests */ 3662 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3663 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3664 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3665 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3666 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3667 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3668 3669 /* communications */ 3670 ptr_idxs = recv_buffer_idxs; 3671 ptr_vals = recv_buffer_vals; 3672 ptr_idxs_is = recv_buffer_idxs_is; 3673 for (i=0;i<n_recvs;i++) { 3674 source_dest = onodes[i]; 3675 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3676 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3677 ptr_idxs += olengths_idxs[i]; 3678 ptr_vals += olengths_vals[i]; 3679 if (nis) { 3680 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3681 ptr_idxs_is += olengths_idxs_is[i]; 3682 } 3683 } 3684 for (i=0;i<n_sends;i++) { 3685 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3686 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3687 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3688 if (nis) { 3689 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3690 } 3691 } 3692 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3693 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3694 3695 /* assemble new l2g map */ 3696 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3697 ptr_idxs = recv_buffer_idxs; 3698 new_local_rows = 0; 3699 for (i=0;i<n_recvs;i++) { 3700 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3701 ptr_idxs += olengths_idxs[i]; 3702 } 3703 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3704 ptr_idxs = recv_buffer_idxs; 3705 new_local_rows = 0; 3706 for (i=0;i<n_recvs;i++) { 3707 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3708 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3709 ptr_idxs += olengths_idxs[i]; 3710 } 3711 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3712 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3713 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3714 3715 /* infer new local matrix type from received local matrices type */ 3716 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3717 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3718 if (n_recvs) { 3719 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3720 ptr_idxs = recv_buffer_idxs; 3721 for (i=0;i<n_recvs;i++) { 3722 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3723 new_local_type_private = MATAIJ_PRIVATE; 3724 break; 3725 } 3726 ptr_idxs += olengths_idxs[i]; 3727 } 3728 switch (new_local_type_private) { 3729 case MATDENSE_PRIVATE: 3730 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3731 new_local_type = MATSEQAIJ; 3732 bs = 1; 3733 } else { /* if I receive only 1 dense matrix */ 3734 new_local_type = MATSEQDENSE; 3735 bs = 1; 3736 } 3737 break; 3738 case MATAIJ_PRIVATE: 3739 new_local_type = MATSEQAIJ; 3740 bs = 1; 3741 break; 3742 case MATBAIJ_PRIVATE: 3743 new_local_type = MATSEQBAIJ; 3744 break; 3745 case MATSBAIJ_PRIVATE: 3746 new_local_type = MATSEQSBAIJ; 3747 break; 3748 default: 3749 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3750 break; 3751 } 3752 } else { /* by default, new_local_type is seqdense */ 3753 new_local_type = MATSEQDENSE; 3754 bs = 1; 3755 } 3756 3757 /* create MATIS object if needed */ 3758 if (reuse == MAT_INITIAL_MATRIX) { 3759 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3760 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3761 } else { 3762 /* it also destroys the local matrices */ 3763 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3764 } 3765 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3766 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3767 3768 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3769 3770 /* Global to local map of received indices */ 3771 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3772 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3773 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3774 3775 /* restore attributes -> type of incoming data and its size */ 3776 buf_size_idxs = 0; 3777 for (i=0;i<n_recvs;i++) { 3778 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3779 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3780 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3781 } 3782 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3783 3784 /* set preallocation */ 3785 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3786 if (!newisdense) { 3787 PetscInt *new_local_nnz=0; 3788 3789 ptr_idxs = recv_buffer_idxs_local; 3790 if (n_recvs) { 3791 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3792 } 3793 for (i=0;i<n_recvs;i++) { 3794 PetscInt j; 3795 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3796 for (j=0;j<*(ptr_idxs+1);j++) { 3797 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3798 } 3799 } else { 3800 /* TODO */ 3801 } 3802 ptr_idxs += olengths_idxs[i]; 3803 } 3804 if (new_local_nnz) { 3805 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3806 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3807 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3808 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3809 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3810 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3811 } else { 3812 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3813 } 3814 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3815 } else { 3816 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3817 } 3818 3819 /* set values */ 3820 ptr_vals = recv_buffer_vals; 3821 ptr_idxs = recv_buffer_idxs_local; 3822 for (i=0;i<n_recvs;i++) { 3823 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3824 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3825 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3826 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3827 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3828 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3829 } else { 3830 /* TODO */ 3831 } 3832 ptr_idxs += olengths_idxs[i]; 3833 ptr_vals += olengths_vals[i]; 3834 } 3835 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3836 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3837 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3838 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3839 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3840 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3841 3842 #if 0 3843 if (!restrict_comm) { /* check */ 3844 Vec lvec,rvec; 3845 PetscReal infty_error; 3846 3847 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3848 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3849 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3850 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3851 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3852 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3853 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3854 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3855 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3856 } 3857 #endif 3858 3859 /* assemble new additional is (if any) */ 3860 if (nis) { 3861 PetscInt **temp_idxs,*count_is,j,psum; 3862 3863 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3864 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3865 ptr_idxs = recv_buffer_idxs_is; 3866 psum = 0; 3867 for (i=0;i<n_recvs;i++) { 3868 for (j=0;j<nis;j++) { 3869 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3870 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3871 psum += plen; 3872 ptr_idxs += plen+1; /* shift pointer to received data */ 3873 } 3874 } 3875 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3876 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3877 for (i=1;i<nis;i++) { 3878 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3879 } 3880 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3881 ptr_idxs = recv_buffer_idxs_is; 3882 for (i=0;i<n_recvs;i++) { 3883 for (j=0;j<nis;j++) { 3884 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3885 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3886 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3887 ptr_idxs += plen+1; /* shift pointer to received data */ 3888 } 3889 } 3890 for (i=0;i<nis;i++) { 3891 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3892 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3893 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3894 } 3895 ierr = PetscFree(count_is);CHKERRQ(ierr); 3896 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3897 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3898 } 3899 /* free workspace */ 3900 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3901 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3902 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3903 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3904 if (isdense) { 3905 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3906 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3907 } else { 3908 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3909 } 3910 if (nis) { 3911 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3912 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3913 } 3914 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3915 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3916 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3917 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3918 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3919 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3920 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3921 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3922 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3923 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3924 ierr = PetscFree(onodes);CHKERRQ(ierr); 3925 if (nis) { 3926 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3927 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3928 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3929 } 3930 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3931 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3932 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3933 for (i=0;i<nis;i++) { 3934 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3935 } 3936 *mat_n = NULL; 3937 } 3938 PetscFunctionReturn(0); 3939 } 3940 3941 /* temporary hack into ksp private data structure */ 3942 #include <petsc/private/kspimpl.h> 3943 3944 #undef __FUNCT__ 3945 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3946 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3947 { 3948 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3949 PC_IS *pcis = (PC_IS*)pc->data; 3950 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3951 ISLocalToGlobalMapping coarse_islg; 3952 IS coarse_is,*isarray; 3953 PetscInt i,im_active=-1,active_procs=-1; 3954 PetscInt nis,nisdofs,nisneu; 3955 PC pc_temp; 3956 PCType coarse_pc_type; 3957 KSPType coarse_ksp_type; 3958 PetscBool multilevel_requested,multilevel_allowed; 3959 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3960 Mat t_coarse_mat_is; 3961 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3962 PetscMPIInt all_procs; 3963 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3964 PetscBool compute_vecs = PETSC_FALSE; 3965 PetscScalar *array; 3966 PetscErrorCode ierr; 3967 3968 PetscFunctionBegin; 3969 /* Assign global numbering to coarse dofs */ 3970 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3971 PetscInt ocoarse_size; 3972 compute_vecs = PETSC_TRUE; 3973 ocoarse_size = pcbddc->coarse_size; 3974 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3975 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3976 /* see if we can avoid some work */ 3977 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3978 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 3979 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 3980 PC pc; 3981 PetscBool isbddc; 3982 3983 /* temporary workaround since PCBDDC does not have a reset method so far */ 3984 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 3985 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3986 if (isbddc) { 3987 ierr = PCDestroy(&pc);CHKERRQ(ierr); 3988 } 3989 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3990 coarse_reuse = PETSC_FALSE; 3991 } else { /* we can safely reuse already computed coarse matrix */ 3992 coarse_reuse = PETSC_TRUE; 3993 } 3994 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3995 coarse_reuse = PETSC_FALSE; 3996 } 3997 /* reset any subassembling information */ 3998 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3999 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4000 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4001 coarse_reuse = PETSC_TRUE; 4002 } 4003 4004 /* count "active" (i.e. with positive local size) and "void" processes */ 4005 im_active = !!(pcis->n); 4006 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4007 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4008 void_procs = all_procs-active_procs; 4009 csin_type_simple = PETSC_TRUE; 4010 redist = PETSC_FALSE; 4011 if (pcbddc->current_level && void_procs) { 4012 csin_ml = PETSC_TRUE; 4013 ncoarse_ml = void_procs; 4014 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4015 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4016 csin_ds = PETSC_TRUE; 4017 ncoarse_ds = pcbddc->redistribute_coarse; 4018 redist = PETSC_TRUE; 4019 } else { 4020 csin_ds = PETSC_TRUE; 4021 ncoarse_ds = active_procs; 4022 redist = PETSC_TRUE; 4023 } 4024 } else { 4025 csin_ml = PETSC_FALSE; 4026 ncoarse_ml = all_procs; 4027 if (void_procs) { 4028 csin_ds = PETSC_TRUE; 4029 ncoarse_ds = void_procs; 4030 csin_type_simple = PETSC_FALSE; 4031 } else { 4032 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4033 csin_ds = PETSC_TRUE; 4034 ncoarse_ds = pcbddc->redistribute_coarse; 4035 redist = PETSC_TRUE; 4036 } else { 4037 csin_ds = PETSC_FALSE; 4038 ncoarse_ds = all_procs; 4039 } 4040 } 4041 } 4042 4043 /* 4044 test if we can go multilevel: three conditions must be satisfied: 4045 - we have not exceeded the number of levels requested 4046 - we can actually subassemble the active processes 4047 - we can find a suitable number of MPI processes where we can place the subassembled problem 4048 */ 4049 multilevel_allowed = PETSC_FALSE; 4050 multilevel_requested = PETSC_FALSE; 4051 if (pcbddc->current_level < pcbddc->max_levels) { 4052 multilevel_requested = PETSC_TRUE; 4053 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4054 multilevel_allowed = PETSC_FALSE; 4055 } else { 4056 multilevel_allowed = PETSC_TRUE; 4057 } 4058 } 4059 /* determine number of process partecipating to coarse solver */ 4060 if (multilevel_allowed) { 4061 ncoarse = ncoarse_ml; 4062 csin = csin_ml; 4063 redist = PETSC_FALSE; 4064 } else { 4065 ncoarse = ncoarse_ds; 4066 csin = csin_ds; 4067 } 4068 4069 /* creates temporary l2gmap and IS for coarse indexes */ 4070 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4071 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4072 4073 /* creates temporary MATIS object for coarse matrix */ 4074 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4075 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4076 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4077 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4078 #if 0 4079 { 4080 PetscViewer viewer; 4081 char filename[256]; 4082 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4083 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4084 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4085 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4086 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4087 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4088 } 4089 #endif 4090 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4091 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4092 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4093 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4094 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4095 4096 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4097 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4098 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4099 const PetscInt *idxs; 4100 ISLocalToGlobalMapping tmap; 4101 4102 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4103 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4104 /* allocate space for temporary storage */ 4105 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4106 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4107 /* allocate for IS array */ 4108 nisdofs = pcbddc->n_ISForDofsLocal; 4109 nisneu = !!pcbddc->NeumannBoundariesLocal; 4110 nis = nisdofs + nisneu; 4111 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4112 /* dofs splitting */ 4113 for (i=0;i<nisdofs;i++) { 4114 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4115 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4116 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4117 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4118 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4119 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4121 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4122 } 4123 /* neumann boundaries */ 4124 if (pcbddc->NeumannBoundariesLocal) { 4125 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4126 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4127 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4128 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4129 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4130 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4131 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4132 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4133 } 4134 /* free memory */ 4135 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4136 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4137 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4138 } else { 4139 nis = 0; 4140 nisdofs = 0; 4141 nisneu = 0; 4142 isarray = NULL; 4143 } 4144 /* destroy no longer needed map */ 4145 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4146 4147 /* restrict on coarse candidates (if needed) */ 4148 coarse_mat_is = NULL; 4149 if (csin) { 4150 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4151 if (redist) { 4152 PetscMPIInt rank; 4153 PetscInt spc,n_spc_p1,dest[1],destsize; 4154 4155 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4156 spc = active_procs/ncoarse; 4157 n_spc_p1 = active_procs%ncoarse; 4158 if (im_active) { 4159 destsize = 1; 4160 if (rank > n_spc_p1*(spc+1)-1) { 4161 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4162 } else { 4163 dest[0] = rank/(spc+1); 4164 } 4165 } else { 4166 destsize = 0; 4167 } 4168 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4169 } else if (csin_type_simple) { 4170 PetscMPIInt rank; 4171 PetscInt issize,isidx; 4172 4173 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4174 if (im_active) { 4175 issize = 1; 4176 isidx = (PetscInt)rank; 4177 } else { 4178 issize = 0; 4179 isidx = -1; 4180 } 4181 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4182 } else { /* get a suitable subassembling pattern from MATIS code */ 4183 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4184 } 4185 4186 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4187 if (!redist || ncoarse <= void_procs) { 4188 PetscInt ncoarse_cand,tissize,*nisindices; 4189 PetscInt *coarse_candidates; 4190 const PetscInt* tisindices; 4191 4192 /* get coarse candidates' ranks in pc communicator */ 4193 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4194 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4195 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4196 if (!coarse_candidates[i]) { 4197 coarse_candidates[ncoarse_cand++]=i; 4198 } 4199 } 4200 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4201 4202 4203 if (pcbddc->dbg_flag) { 4204 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4205 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4206 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4207 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4208 for (i=0;i<ncoarse_cand;i++) { 4209 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4210 } 4211 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4212 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4213 } 4214 /* shift the pattern on coarse candidates */ 4215 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4216 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4217 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4218 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4219 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4220 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4221 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4222 } 4223 if (pcbddc->dbg_flag) { 4224 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4225 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4226 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4227 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4228 } 4229 } 4230 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4231 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4232 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4233 } else { /* this is the last level, so use just receiving processes in subcomm */ 4234 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4235 } 4236 } else { 4237 if (pcbddc->dbg_flag) { 4238 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4239 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4240 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4241 } 4242 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4243 coarse_mat_is = t_coarse_mat_is; 4244 } 4245 4246 /* create local to global scatters for coarse problem */ 4247 if (compute_vecs) { 4248 PetscInt lrows; 4249 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4250 if (coarse_mat_is) { 4251 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4252 } else { 4253 lrows = 0; 4254 } 4255 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4256 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4257 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4258 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4259 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4260 } 4261 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4262 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4263 4264 /* set defaults for coarse KSP and PC */ 4265 if (multilevel_allowed) { 4266 coarse_ksp_type = KSPRICHARDSON; 4267 coarse_pc_type = PCBDDC; 4268 } else { 4269 coarse_ksp_type = KSPPREONLY; 4270 coarse_pc_type = PCREDUNDANT; 4271 } 4272 4273 /* print some info if requested */ 4274 if (pcbddc->dbg_flag) { 4275 if (!multilevel_allowed) { 4276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4277 if (multilevel_requested) { 4278 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4279 } else if (pcbddc->max_levels) { 4280 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4281 } 4282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4283 } 4284 } 4285 4286 /* create the coarse KSP object only once with defaults */ 4287 if (coarse_mat_is) { 4288 MatReuse coarse_mat_reuse; 4289 PetscViewer dbg_viewer = NULL; 4290 if (pcbddc->dbg_flag) { 4291 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4292 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4293 } 4294 if (!pcbddc->coarse_ksp) { 4295 char prefix[256],str_level[16]; 4296 size_t len; 4297 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4298 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4299 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4300 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4301 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4302 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4303 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4304 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4305 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4306 /* prefix */ 4307 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4308 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4309 if (!pcbddc->current_level) { 4310 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4311 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4312 } else { 4313 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4314 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4315 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4316 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4317 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4318 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4319 } 4320 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4321 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4322 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4323 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4324 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4325 /* allow user customization */ 4326 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4327 } 4328 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4329 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4330 if (nisdofs) { 4331 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4332 for (i=0;i<nisdofs;i++) { 4333 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4334 } 4335 } 4336 if (nisneu) { 4337 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4338 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4339 } 4340 4341 /* get some info after set from options */ 4342 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4343 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4344 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4345 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4346 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4347 isbddc = PETSC_FALSE; 4348 } 4349 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4350 if (isredundant) { 4351 KSP inner_ksp; 4352 PC inner_pc; 4353 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4354 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4355 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4356 } 4357 4358 /* assemble coarse matrix */ 4359 if (coarse_reuse) { 4360 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4361 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4362 coarse_mat_reuse = MAT_REUSE_MATRIX; 4363 } else { 4364 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4365 } 4366 if (isbddc || isnn) { 4367 if (pcbddc->coarsening_ratio > 1) { 4368 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4369 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4370 if (pcbddc->dbg_flag) { 4371 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4372 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4373 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4374 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4375 } 4376 } 4377 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4378 } else { 4379 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4380 coarse_mat = coarse_mat_is; 4381 } 4382 } else { 4383 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4384 } 4385 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4386 4387 /* propagate symmetry info of coarse matrix */ 4388 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4389 if (pc->pmat->symmetric_set) { 4390 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4391 } 4392 if (pc->pmat->hermitian_set) { 4393 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4394 } 4395 if (pc->pmat->spd_set) { 4396 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4397 } 4398 /* set operators */ 4399 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4400 if (pcbddc->dbg_flag) { 4401 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4402 } 4403 } else { /* processes non partecipating to coarse solver (if any) */ 4404 coarse_mat = 0; 4405 } 4406 ierr = PetscFree(isarray);CHKERRQ(ierr); 4407 #if 0 4408 { 4409 PetscViewer viewer; 4410 char filename[256]; 4411 sprintf(filename,"coarse_mat.m"); 4412 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4413 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4414 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4415 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 4416 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4417 } 4418 #endif 4419 4420 if (pcbddc->coarse_ksp) { 4421 Vec crhs,csol; 4422 PetscBool ispreonly; 4423 4424 /* setup coarse ksp */ 4425 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4426 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4427 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4428 /* hack */ 4429 if (!csol) { 4430 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4431 } 4432 if (!crhs) { 4433 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4434 } 4435 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4436 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4437 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4438 KSP check_ksp; 4439 KSPType check_ksp_type; 4440 PC check_pc; 4441 Vec check_vec,coarse_vec; 4442 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4443 PetscInt its; 4444 PetscBool compute_eigs; 4445 PetscReal *eigs_r,*eigs_c; 4446 PetscInt neigs; 4447 const char *prefix; 4448 4449 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4450 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4451 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4452 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4453 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4454 if (ispreonly) { 4455 check_ksp_type = KSPPREONLY; 4456 compute_eigs = PETSC_FALSE; 4457 } else { 4458 check_ksp_type = KSPGMRES; 4459 compute_eigs = PETSC_TRUE; 4460 } 4461 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4462 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4463 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4464 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4465 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4466 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4467 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4468 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4469 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4470 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4471 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4472 /* create random vec */ 4473 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4474 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4475 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4476 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4477 /* solve coarse problem */ 4478 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4479 /* set eigenvalue estimation if preonly has not been requested */ 4480 if (compute_eigs) { 4481 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4482 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4483 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4484 lambda_max = eigs_r[neigs-1]; 4485 lambda_min = eigs_r[0]; 4486 if (pcbddc->use_coarse_estimates) { 4487 if (lambda_max>lambda_min) { 4488 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4489 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4490 } 4491 } 4492 } 4493 4494 /* check coarse problem residual error */ 4495 if (pcbddc->dbg_flag) { 4496 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4497 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4498 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4499 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4500 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4501 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4502 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4503 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4504 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4505 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4506 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4507 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4508 if (compute_eigs) { 4509 PetscReal lambda_max_s,lambda_min_s; 4510 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4511 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4512 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4513 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4514 for (i=0;i<neigs;i++) { 4515 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4516 } 4517 } 4518 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4519 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4520 } 4521 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4522 if (compute_eigs) { 4523 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4524 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4525 } 4526 } 4527 } 4528 /* print additional info */ 4529 if (pcbddc->dbg_flag) { 4530 /* waits until all processes reaches this point */ 4531 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4532 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4533 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4534 } 4535 4536 /* free memory */ 4537 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4538 PetscFunctionReturn(0); 4539 } 4540 4541 #undef __FUNCT__ 4542 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4543 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4544 { 4545 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4546 PC_IS* pcis = (PC_IS*)pc->data; 4547 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4548 IS subset,subset_mult,subset_n; 4549 PetscInt local_size,coarse_size=0; 4550 PetscInt *local_primal_indices=NULL; 4551 const PetscInt *t_local_primal_indices; 4552 PetscErrorCode ierr; 4553 4554 PetscFunctionBegin; 4555 /* Compute global number of coarse dofs */ 4556 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4557 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4558 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4559 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4560 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4561 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4562 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4563 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4564 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4565 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 4566 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4567 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4568 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4569 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4570 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4571 4572 /* check numbering */ 4573 if (pcbddc->dbg_flag) { 4574 PetscScalar coarsesum,*array; 4575 PetscInt i; 4576 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4577 4578 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4579 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4580 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4581 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4582 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4583 for (i=0;i<pcbddc->local_primal_size;i++) { 4584 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4585 } 4586 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4587 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4588 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4589 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4590 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4591 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4592 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4593 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4594 for (i=0;i<pcis->n;i++) { 4595 if (array[i] == 1.0) { 4596 set_error = PETSC_TRUE; 4597 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4598 } 4599 } 4600 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4601 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4602 for (i=0;i<pcis->n;i++) { 4603 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4604 } 4605 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4606 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4607 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4608 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4609 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4610 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4611 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4612 PetscInt *gidxs; 4613 4614 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4615 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4616 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4617 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4619 for (i=0;i<pcbddc->local_primal_size;i++) { 4620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4621 } 4622 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4623 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4624 } 4625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4626 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4627 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4628 } 4629 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4630 /* get back data */ 4631 *coarse_size_n = coarse_size; 4632 *local_primal_indices_n = local_primal_indices; 4633 PetscFunctionReturn(0); 4634 } 4635 4636 #undef __FUNCT__ 4637 #define __FUNCT__ "PCBDDCGlobalToLocal" 4638 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4639 { 4640 IS localis_t; 4641 PetscInt i,lsize,*idxs,n; 4642 PetscScalar *vals; 4643 PetscErrorCode ierr; 4644 4645 PetscFunctionBegin; 4646 /* get indices in local ordering exploiting local to global map */ 4647 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4648 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4649 for (i=0;i<lsize;i++) vals[i] = 1.0; 4650 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4651 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4652 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4653 if (idxs) { /* multilevel guard */ 4654 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4655 } 4656 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4657 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4658 ierr = PetscFree(vals);CHKERRQ(ierr); 4659 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4660 /* now compute set in local ordering */ 4661 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4662 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4663 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4664 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4665 for (i=0,lsize=0;i<n;i++) { 4666 if (PetscRealPart(vals[i]) > 0.5) { 4667 lsize++; 4668 } 4669 } 4670 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4671 for (i=0,lsize=0;i<n;i++) { 4672 if (PetscRealPart(vals[i]) > 0.5) { 4673 idxs[lsize++] = i; 4674 } 4675 } 4676 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4677 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4678 *localis = localis_t; 4679 PetscFunctionReturn(0); 4680 } 4681 4682 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4683 #undef __FUNCT__ 4684 #define __FUNCT__ "PCBDDCMatMult_Private" 4685 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4686 { 4687 PCBDDCChange_ctx change_ctx; 4688 PetscErrorCode ierr; 4689 4690 PetscFunctionBegin; 4691 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4692 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4693 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4694 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4695 PetscFunctionReturn(0); 4696 } 4697 4698 #undef __FUNCT__ 4699 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4700 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4701 { 4702 PCBDDCChange_ctx change_ctx; 4703 PetscErrorCode ierr; 4704 4705 PetscFunctionBegin; 4706 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4707 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4708 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4709 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4710 PetscFunctionReturn(0); 4711 } 4712 4713 #undef __FUNCT__ 4714 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4715 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4716 { 4717 PC_IS *pcis=(PC_IS*)pc->data; 4718 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4719 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4720 Mat S_j; 4721 PetscInt *used_xadj,*used_adjncy; 4722 PetscBool free_used_adj; 4723 PetscErrorCode ierr; 4724 4725 PetscFunctionBegin; 4726 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4727 free_used_adj = PETSC_FALSE; 4728 if (pcbddc->sub_schurs_layers == -1) { 4729 used_xadj = NULL; 4730 used_adjncy = NULL; 4731 } else { 4732 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4733 used_xadj = pcbddc->mat_graph->xadj; 4734 used_adjncy = pcbddc->mat_graph->adjncy; 4735 } else if (pcbddc->computed_rowadj) { 4736 used_xadj = pcbddc->mat_graph->xadj; 4737 used_adjncy = pcbddc->mat_graph->adjncy; 4738 } else { 4739 PetscBool flg_row=PETSC_FALSE; 4740 const PetscInt *xadj,*adjncy; 4741 PetscInt nvtxs; 4742 4743 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4744 if (flg_row) { 4745 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4746 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4747 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4748 free_used_adj = PETSC_TRUE; 4749 } else { 4750 pcbddc->sub_schurs_layers = -1; 4751 used_xadj = NULL; 4752 used_adjncy = NULL; 4753 } 4754 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4755 } 4756 } 4757 4758 /* setup sub_schurs data */ 4759 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4760 if (!sub_schurs->use_mumps) { 4761 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4762 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4763 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4764 } else { 4765 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4766 PetscBool isseqaij; 4767 if (!pcbddc->use_vertices && reuse_solvers) { 4768 PetscInt n_vertices; 4769 4770 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4771 reuse_solvers = (PetscBool)!n_vertices; 4772 } 4773 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4774 if (!isseqaij) { 4775 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4776 if (matis->A == pcbddc->local_mat) { 4777 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4778 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4779 } else { 4780 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4781 } 4782 } 4783 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4784 } 4785 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4786 4787 /* free adjacency */ 4788 if (free_used_adj) { 4789 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4790 } 4791 PetscFunctionReturn(0); 4792 } 4793 4794 #undef __FUNCT__ 4795 #define __FUNCT__ "PCBDDCInitSubSchurs" 4796 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4797 { 4798 PC_IS *pcis=(PC_IS*)pc->data; 4799 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4800 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4801 PCBDDCGraph graph; 4802 PetscErrorCode ierr; 4803 4804 PetscFunctionBegin; 4805 /* attach interface graph for determining subsets */ 4806 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4807 IS verticesIS,verticescomm; 4808 PetscInt vsize,*idxs; 4809 4810 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4811 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4812 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4813 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4814 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4815 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4816 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4817 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4818 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4819 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4820 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4821 /* 4822 if (pcbddc->dbg_flag) { 4823 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4824 } 4825 */ 4826 } else { 4827 graph = pcbddc->mat_graph; 4828 } 4829 4830 /* sub_schurs init */ 4831 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4832 4833 /* free graph struct */ 4834 if (pcbddc->sub_schurs_rebuild) { 4835 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4836 } 4837 PetscFunctionReturn(0); 4838 } 4839