1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 #if defined(PETSC_USE_COMPLEX) 92 rwork = NULL; 93 #endif 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 155 const PetscInt *idxs; 156 PetscReal infty = PETSC_MAX_REAL; 157 PetscInt j,subset_size,eigs_start = 0; 158 PetscBLASInt B_N; 159 PetscBool same_data = PETSC_FALSE; 160 161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 162 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 163 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 164 if (sub_schurs->is_hermitian) { 165 PetscInt j,k; 166 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 167 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 168 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 for (k=j;k<subset_size;k++) { 172 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 173 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 174 } 175 } 176 } else { 177 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 178 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 179 } 180 } else { 181 S = Sarray + cumarray; 182 St = Starray + cumarray; 183 } 184 185 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 186 /* see if we can save some work */ 187 if (sub_schurs->n_subs == 1) { 188 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 189 } 190 191 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 192 B_neigs = 0; 193 } else { 194 /* Threshold: this is an heuristic for edges */ 195 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 196 197 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 198 PetscBLASInt B_itype = 1; 199 PetscBLASInt B_IL, B_IU; 200 PetscReal eps = -1.0; /* dlamch? */ 201 PetscInt nmin_s; 202 203 /* ask for eigenvalues larger than thresh */ 204 if (pcbddc->dbg_flag) { 205 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 206 } 207 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 208 #if defined(PETSC_USE_COMPLEX) 209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 210 #else 211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 212 #endif 213 ierr = PetscFPTrapPop();CHKERRQ(ierr); 214 if (B_ierr) { 215 if (B_ierr < 0 ) { 216 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 217 } else if (B_ierr <= B_N) { 218 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 219 } else { 220 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 221 } 222 } 223 224 if (B_neigs > nmax) { 225 if (pcbddc->dbg_flag) { 226 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 227 } 228 eigs_start = B_neigs -nmax; 229 B_neigs = nmax; 230 } 231 232 nmin_s = PetscMin(nmin,B_N); 233 if (B_neigs < nmin_s) { 234 PetscBLASInt B_neigs2; 235 236 B_IU = B_N - B_neigs; 237 B_IL = B_N - nmin_s + 1; 238 if (pcbddc->dbg_flag) { 239 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 240 } 241 if (sub_schurs->is_hermitian) { 242 PetscInt j; 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 for (j=0;j<subset_size;j++) { 247 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 248 } 249 } else { 250 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 251 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 } 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 #if defined(PETSC_USE_COMPLEX) 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 256 #else 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 258 #endif 259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 260 B_neigs += B_neigs2; 261 } 262 if (B_ierr) { 263 if (B_ierr < 0 ) { 264 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 265 } else if (B_ierr <= B_N) { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 267 } else { 268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 269 } 270 } 271 if (pcbddc->dbg_flag) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 273 for (j=0;j<B_neigs;j++) { 274 if (eigs[j] == 0.0) { 275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 276 } else { 277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 278 } 279 } 280 } 281 } else { 282 /* TODO */ 283 } 284 } 285 maxneigs = PetscMax(B_neigs,maxneigs); 286 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 287 if (B_neigs) { 288 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 289 290 if (pcbddc->dbg_flag > 1) { 291 PetscInt ii; 292 for (ii=0;ii<B_neigs;ii++) { 293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 294 for (j=0;j<B_N;j++) { 295 #if defined(PETSC_USE_COMPLEX) 296 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 297 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 299 #else 300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 301 #endif 302 } 303 } 304 } 305 #if 0 306 for (j=0;j<B_neigs;j++) { 307 PetscBLASInt Blas_N,Blas_one = 1.0; 308 PetscScalar norm; 309 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 310 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 311 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 312 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 313 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 314 } else { 315 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 316 } 317 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 318 } 319 #endif 320 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 321 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 322 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 323 cum++; 324 } 325 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 326 /* shift for next computation */ 327 cumarray += subset_size*subset_size; 328 } 329 if (pcbddc->dbg_flag) { 330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 331 } 332 333 if (mss) { 334 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 335 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 336 /* destroy matrices (junk) */ 337 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 338 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 339 } 340 if (allocated_S_St) { 341 ierr = PetscFree2(S,St);CHKERRQ(ierr); 342 } 343 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 344 #if defined(PETSC_USE_COMPLEX) 345 ierr = PetscFree(rwork);CHKERRQ(ierr); 346 #endif 347 if (pcbddc->dbg_flag) { 348 PetscInt maxneigs_r; 349 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 351 } 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCSetUpSolvers" 357 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 358 { 359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 360 PetscScalar *coarse_submat_vals; 361 PetscErrorCode ierr; 362 363 PetscFunctionBegin; 364 /* Setup local scatters R_to_B and (optionally) R_to_D */ 365 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 366 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 367 368 /* Setup local neumann solver ksp_R */ 369 /* PCBDDCSetUpLocalScatters should be called first! */ 370 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 371 372 /* Change global null space passed in by the user if change of basis has been requested */ 373 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 374 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 375 } 376 377 /* 378 Setup local correction and local part of coarse basis. 379 Gives back the dense local part of the coarse matrix in column major ordering 380 */ 381 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 382 383 /* Compute total number of coarse nodes and setup coarse solver */ 384 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 385 386 /* free */ 387 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "PCBDDCResetCustomization" 393 PetscErrorCode PCBDDCResetCustomization(PC pc) 394 { 395 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 401 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 402 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 403 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 405 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 406 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 407 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 408 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 409 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetTopography" 415 PetscErrorCode PCBDDCResetTopography(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 422 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 423 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 424 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 425 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCResetSolvers" 431 PetscErrorCode PCBDDCResetSolvers(PC pc) 432 { 433 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *array; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 439 if (pcbddc->coarse_phi_B) { 440 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 441 ierr = PetscFree(array);CHKERRQ(ierr); 442 } 443 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 445 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 447 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 449 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 450 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 451 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 452 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 454 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 455 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 456 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 457 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 458 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 459 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 462 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 463 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 464 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 465 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 471 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 472 { 473 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 474 PC_IS *pcis = (PC_IS*)pc->data; 475 VecType impVecType; 476 PetscInt n_constraints,n_R,old_size; 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 if (!pcbddc->ConstraintMatrix) { 481 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 482 } 483 /* get sizes */ 484 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 485 n_R = pcis->n-pcbddc->n_vertices; 486 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 487 /* local work vectors (try to avoid unneeded work)*/ 488 /* R nodes */ 489 old_size = -1; 490 if (pcbddc->vec1_R) { 491 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 492 } 493 if (n_R != old_size) { 494 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 495 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 496 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 497 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 498 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 499 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 500 } 501 /* local primal dofs */ 502 old_size = -1; 503 if (pcbddc->vec1_P) { 504 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 505 } 506 if (pcbddc->local_primal_size != old_size) { 507 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 508 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 509 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 510 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 511 } 512 /* local explicit constraints */ 513 old_size = -1; 514 if (pcbddc->vec1_C) { 515 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 516 } 517 if (n_constraints && n_constraints != old_size) { 518 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 519 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 520 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 521 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 522 } 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "PCBDDCSetUpCorrection" 528 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 529 { 530 PetscErrorCode ierr; 531 /* pointers to pcis and pcbddc */ 532 PC_IS* pcis = (PC_IS*)pc->data; 533 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 534 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 535 /* submatrices of local problem */ 536 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 537 /* submatrices of local coarse problem */ 538 Mat S_VV,S_CV,S_VC,S_CC; 539 /* working matrices */ 540 Mat C_CR; 541 /* additional working stuff */ 542 PC pc_R; 543 Mat F; 544 PetscBool isLU,isCHOL,isILU; 545 546 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 547 PetscScalar *work; 548 PetscInt *idx_V_B; 549 PetscInt n,n_vertices,n_constraints; 550 PetscInt i,n_R,n_D,n_B; 551 PetscBool unsymmetric_check; 552 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 553 MatType impMatType; 554 /* some shortcuts to scalars */ 555 PetscScalar one=1.0,m_one=-1.0; 556 557 PetscFunctionBegin; 558 n_vertices = pcbddc->n_vertices; 559 n_constraints = pcbddc->local_primal_size-n_vertices; 560 /* Set Non-overlapping dimensions */ 561 n_B = pcis->n_B; 562 n_D = pcis->n - n_B; 563 n_R = pcis->n - n_vertices; 564 565 /* Set types for local objects needed by BDDC precondtioner */ 566 impMatType = MATSEQDENSE; 567 568 /* vertices in boundary numbering */ 569 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 570 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 571 if (i != n_vertices) { 572 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 573 } 574 575 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 576 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 577 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 578 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 580 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 581 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 582 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 584 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 585 586 unsymmetric_check = PETSC_FALSE; 587 /* allocate workspace */ 588 n = 0; 589 if (n_constraints) { 590 n += n_R*n_constraints; 591 } 592 if (n_vertices) { 593 n = PetscMax(2*n_R*n_vertices,n); 594 n = PetscMax((n_R+n_B)*n_vertices,n); 595 } 596 if (!pcbddc->symmetric_primal) { 597 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 598 unsymmetric_check = PETSC_TRUE; 599 } 600 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 601 602 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 603 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 604 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 605 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 606 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 607 if (isLU || isILU || isCHOL) { 608 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 609 } else if (sub_schurs->reuse_mumps) { 610 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 611 MatFactorType type; 612 613 F = reuse_mumps->F; 614 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 615 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 616 } else { 617 F = NULL; 618 } 619 620 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 621 if (n_constraints) { 622 Mat M1,M2,M3; 623 Mat auxmat; 624 IS is_aux; 625 PetscScalar *array,*array2; 626 627 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 628 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 629 630 /* Extract constraints on R nodes: C_{CR} */ 631 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 632 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 633 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 634 635 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 636 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 637 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 638 for (i=0;i<n_constraints;i++) { 639 const PetscScalar *row_cmat_values; 640 const PetscInt *row_cmat_indices; 641 PetscInt size_of_constraint,j; 642 643 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 644 for (j=0;j<size_of_constraint;j++) { 645 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 646 } 647 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 648 } 649 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 650 if (F) { 651 Mat B; 652 653 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 654 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 655 ierr = MatDestroy(&B);CHKERRQ(ierr); 656 } else { 657 PetscScalar *marr; 658 659 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 660 for (i=0;i<n_constraints;i++) { 661 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 662 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 663 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 664 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 665 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 666 } 667 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 668 } 669 if (!pcbddc->switch_static) { 670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 671 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 672 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 for (i=0;i<n_constraints;i++) { 674 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 675 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 676 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 677 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 678 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 679 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 680 } 681 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 682 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 683 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 684 } else { 685 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 686 pcbddc->local_auxmat2 = local_auxmat2_R; 687 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 688 } 689 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 690 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 691 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 692 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 693 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 694 if (isCHOL) { 695 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 696 } else { 697 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 698 } 699 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 700 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 701 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 702 ierr = MatDestroy(&M2);CHKERRQ(ierr); 703 ierr = MatDestroy(&M3);CHKERRQ(ierr); 704 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 705 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 706 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 707 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 708 ierr = MatDestroy(&M1);CHKERRQ(ierr); 709 } 710 /* Get submatrices from subdomain matrix */ 711 if (n_vertices) { 712 IS is_aux; 713 714 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 715 IS tis; 716 717 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 718 ierr = ISSort(tis);CHKERRQ(ierr); 719 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 ierr = ISDestroy(&tis);CHKERRQ(ierr); 721 } else { 722 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 723 } 724 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 725 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 726 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 727 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 728 } 729 730 /* Matrix of coarse basis functions (local) */ 731 if (pcbddc->coarse_phi_B) { 732 PetscInt on_B,on_primal,on_D=n_D; 733 if (pcbddc->coarse_phi_D) { 734 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 735 } 736 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 737 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 738 PetscScalar *marray; 739 740 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 741 ierr = PetscFree(marray);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 743 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 744 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 745 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 746 } 747 } 748 749 if (!pcbddc->coarse_phi_B) { 750 PetscScalar *marray; 751 752 n = n_B*pcbddc->local_primal_size; 753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 754 n += n_D*pcbddc->local_primal_size; 755 } 756 if (!pcbddc->symmetric_primal) { 757 n *= 2; 758 } 759 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 760 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 761 n = n_B*pcbddc->local_primal_size; 762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 764 n += n_D*pcbddc->local_primal_size; 765 } 766 if (!pcbddc->symmetric_primal) { 767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 768 if (pcbddc->switch_static || pcbddc->dbg_flag) { 769 n = n_B*pcbddc->local_primal_size; 770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 771 } 772 } else { 773 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 774 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 775 if (pcbddc->switch_static || pcbddc->dbg_flag) { 776 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 777 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 778 } 779 } 780 } 781 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 782 /* vertices */ 783 if (n_vertices) { 784 785 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 786 787 if (n_R) { 788 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 789 PetscBLASInt B_N,B_one = 1; 790 PetscScalar *x,*y; 791 PetscBool isseqaij; 792 793 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 794 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 796 if (F) { /* TODO could be optimized for symmetric problems */ 797 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 798 } else { 799 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 800 for (i=0;i<n_vertices;i++) { 801 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 802 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 803 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 804 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 806 } 807 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 808 } 809 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 810 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 811 if (n_constraints) { 812 Mat B; 813 814 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 815 for (i=0;i<n_vertices;i++) { 816 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 817 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 818 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 819 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 820 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 821 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 822 } 823 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 824 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 825 ierr = MatDestroy(&B);CHKERRQ(ierr); 826 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 827 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 828 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 829 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 830 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 831 ierr = MatDestroy(&B);CHKERRQ(ierr); 832 } 833 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 834 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 835 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 836 } 837 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 838 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 839 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 841 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 842 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 843 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 844 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 845 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 846 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 847 } else { 848 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 849 } 850 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 851 /* coarse basis functions */ 852 for (i=0;i<n_vertices;i++) { 853 PetscScalar *y; 854 855 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 856 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 857 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 858 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 y[n_B*i+idx_V_B[i]] = 1.0; 861 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 862 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 863 864 if (pcbddc->switch_static || pcbddc->dbg_flag) { 865 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 866 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 867 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 870 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 871 } 872 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 873 } 874 /* if n_R == 0 the object is not destroyed */ 875 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 876 } 877 878 if (n_constraints) { 879 Mat B; 880 881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 882 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 883 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 884 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 885 if (n_vertices) { 886 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 887 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 888 } else { 889 Mat S_VCt; 890 891 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 892 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 893 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 894 } 895 } 896 ierr = MatDestroy(&B);CHKERRQ(ierr); 897 /* coarse basis functions */ 898 for (i=0;i<n_constraints;i++) { 899 PetscScalar *y; 900 901 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 902 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 903 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 904 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 907 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 908 if (pcbddc->switch_static || pcbddc->dbg_flag) { 909 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 910 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 911 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 912 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 914 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 915 } 916 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 917 } 918 } 919 if (n_constraints) { 920 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 921 } 922 923 /* compute other basis functions for non-symmetric problems */ 924 if (!pcbddc->symmetric_primal) { 925 926 if (n_constraints) { 927 Mat S_CCT,B_C; 928 929 /* this is a lazy thing */ 930 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 931 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 932 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 933 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 934 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 935 if (n_vertices) { 936 Mat B_V,S_VCT; 937 938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 939 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 940 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 941 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 942 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 943 } 944 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 945 } else { /* if there are no constraints, reset work */ 946 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 947 } 948 if (n_vertices && n_R) { 949 Mat A_VRT; 950 PetscScalar *marray; 951 PetscBLASInt B_N,B_one = 1; 952 953 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 954 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 955 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 956 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 957 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 958 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 960 } 961 962 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 963 for (i=0;i<pcbddc->local_primal_size;i++) { 964 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 965 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 966 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 967 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 968 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 969 } 970 } else { 971 for (i=0;i<pcbddc->local_primal_size;i++) { 972 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 973 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 974 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 975 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 976 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 977 } 978 } 979 /* coarse basis functions */ 980 for (i=0;i<pcbddc->local_primal_size;i++) { 981 PetscScalar *y; 982 983 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 984 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 985 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 986 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 if (i<n_vertices) { 989 y[n_B*i+idx_V_B[i]] = 1.0; 990 } 991 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 992 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 993 994 if (pcbddc->switch_static || pcbddc->dbg_flag) { 995 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 996 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 997 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1000 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1001 } 1002 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1003 } 1004 } 1005 /* free memory */ 1006 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1007 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1008 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1009 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1011 ierr = PetscFree(work);CHKERRQ(ierr); 1012 if (n_vertices) { 1013 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1014 } 1015 if (n_constraints) { 1016 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1017 } 1018 /* Checking coarse_sub_mat and coarse basis functios */ 1019 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1020 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1021 if (pcbddc->dbg_flag) { 1022 Mat coarse_sub_mat; 1023 Mat AUXMAT,TM1,TM2,TM3,TM4; 1024 Mat coarse_phi_D,coarse_phi_B; 1025 Mat coarse_psi_D,coarse_psi_B; 1026 Mat A_II,A_BB,A_IB,A_BI; 1027 Mat C_B,CPHI; 1028 IS is_dummy; 1029 Vec mones; 1030 MatType checkmattype=MATSEQAIJ; 1031 PetscReal real_value; 1032 1033 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1034 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1035 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1036 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1037 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1038 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1039 if (unsymmetric_check) { 1040 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1042 } 1043 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1044 1045 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1047 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1048 if (unsymmetric_check) { 1049 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1051 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 } else { 1062 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1063 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1064 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 } 1071 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1072 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1073 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1074 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1077 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 1080 /* check constraints */ 1081 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1082 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1083 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1084 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1085 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1086 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1087 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1089 if (unsymmetric_check) { 1090 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1091 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1092 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1093 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1095 } 1096 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1097 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1098 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1099 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1100 1101 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1102 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1103 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1104 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1105 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1106 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1107 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1108 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1109 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1110 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1111 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1112 if (unsymmetric_check) { 1113 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1114 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1115 } 1116 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1117 } 1118 /* get back data */ 1119 *coarse_submat_vals_n = coarse_submat_vals; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1125 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1126 { 1127 Mat *work_mat; 1128 IS isrow_s,iscol_s; 1129 PetscBool rsorted,csorted; 1130 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1135 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1136 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1137 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1138 1139 if (!rsorted) { 1140 const PetscInt *idxs; 1141 PetscInt *idxs_sorted,i; 1142 1143 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1144 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1145 for (i=0;i<rsize;i++) { 1146 idxs_perm_r[i] = i; 1147 } 1148 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1149 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1150 for (i=0;i<rsize;i++) { 1151 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1152 } 1153 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1154 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1155 } else { 1156 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1157 isrow_s = isrow; 1158 } 1159 1160 if (!csorted) { 1161 if (isrow == iscol) { 1162 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1163 iscol_s = isrow_s; 1164 } else { 1165 const PetscInt *idxs; 1166 PetscInt *idxs_sorted,i; 1167 1168 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1169 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1170 for (i=0;i<csize;i++) { 1171 idxs_perm_c[i] = i; 1172 } 1173 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1174 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1175 for (i=0;i<csize;i++) { 1176 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1177 } 1178 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1179 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1180 } 1181 } else { 1182 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1183 iscol_s = iscol; 1184 } 1185 1186 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1187 1188 if (!rsorted || !csorted) { 1189 Mat new_mat; 1190 IS is_perm_r,is_perm_c; 1191 1192 if (!rsorted) { 1193 PetscInt *idxs_r,i; 1194 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1195 for (i=0;i<rsize;i++) { 1196 idxs_r[idxs_perm_r[i]] = i; 1197 } 1198 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1199 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1200 } else { 1201 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1202 } 1203 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1204 1205 if (!csorted) { 1206 if (isrow_s == iscol_s) { 1207 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1208 is_perm_c = is_perm_r; 1209 } else { 1210 PetscInt *idxs_c,i; 1211 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1212 for (i=0;i<csize;i++) { 1213 idxs_c[idxs_perm_c[i]] = i; 1214 } 1215 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1216 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1217 } 1218 } else { 1219 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1220 } 1221 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1222 1223 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1225 work_mat[0] = new_mat; 1226 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1227 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1228 } 1229 1230 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1231 *B = work_mat[0]; 1232 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1233 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1234 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1240 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1241 { 1242 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1243 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1244 Mat new_mat; 1245 IS is_local,is_global; 1246 PetscInt local_size; 1247 PetscBool isseqaij; 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1252 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1253 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1254 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1255 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1256 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1257 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1258 1259 /* check */ 1260 if (pcbddc->dbg_flag) { 1261 Vec x,x_change; 1262 PetscReal error; 1263 1264 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1265 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1266 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1267 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1270 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1273 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1275 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1276 ierr = VecDestroy(&x);CHKERRQ(ierr); 1277 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1278 } 1279 1280 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1281 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1282 if (isseqaij) { 1283 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1284 } else { 1285 Mat work_mat; 1286 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1287 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1288 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1289 } 1290 if (matis->A->symmetric_set) { 1291 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1292 #if !defined(PETSC_USE_COMPLEX) 1293 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1294 #endif 1295 } 1296 /* 1297 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1298 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1299 */ 1300 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1306 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1307 { 1308 PC_IS* pcis = (PC_IS*)(pc->data); 1309 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1310 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1311 PetscInt *idx_R_local=NULL; 1312 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1313 PetscInt vbs,bs; 1314 PetscBT bitmask=NULL; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 /* 1319 No need to setup local scatters if 1320 - primal space is unchanged 1321 AND 1322 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1323 AND 1324 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1325 */ 1326 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1327 PetscFunctionReturn(0); 1328 } 1329 /* destroy old objects */ 1330 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1331 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1332 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1333 /* Set Non-overlapping dimensions */ 1334 n_B = pcis->n_B; 1335 n_D = pcis->n - n_B; 1336 n_vertices = pcbddc->n_vertices; 1337 1338 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1339 1340 /* create auxiliary bitmask and allocate workspace */ 1341 if (!sub_schurs->reuse_mumps) { 1342 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1343 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1344 for (i=0;i<n_vertices;i++) { 1345 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1346 } 1347 1348 for (i=0, n_R=0; i<pcis->n; i++) { 1349 if (!PetscBTLookup(bitmask,i)) { 1350 idx_R_local[n_R++] = i; 1351 } 1352 } 1353 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1354 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1355 1356 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1357 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1358 } 1359 1360 /* Block code */ 1361 vbs = 1; 1362 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1363 if (bs>1 && !(n_vertices%bs)) { 1364 PetscBool is_blocked = PETSC_TRUE; 1365 PetscInt *vary; 1366 if (!sub_schurs->reuse_mumps) { 1367 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1368 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1369 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1370 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1371 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1372 for (i=0; i<pcis->n/bs; i++) { 1373 if (vary[i]!=0 && vary[i]!=bs) { 1374 is_blocked = PETSC_FALSE; 1375 break; 1376 } 1377 } 1378 ierr = PetscFree(vary);CHKERRQ(ierr); 1379 } else { 1380 /* Verify directly the R set */ 1381 for (i=0; i<n_R/bs; i++) { 1382 PetscInt j,node=idx_R_local[bs*i]; 1383 for (j=1; j<bs; j++) { 1384 if (node != idx_R_local[bs*i+j]-j) { 1385 is_blocked = PETSC_FALSE; 1386 break; 1387 } 1388 } 1389 } 1390 } 1391 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1392 vbs = bs; 1393 for (i=0;i<n_R/vbs;i++) { 1394 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1395 } 1396 } 1397 } 1398 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1399 if (sub_schurs->reuse_mumps) { 1400 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1401 1402 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1403 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1404 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1405 reuse_mumps->is_R = pcbddc->is_R_local; 1406 } else { 1407 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1408 } 1409 1410 /* print some info if requested */ 1411 if (pcbddc->dbg_flag) { 1412 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1414 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1418 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1419 } 1420 1421 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1422 if (!sub_schurs->reuse_mumps) { 1423 IS is_aux1,is_aux2; 1424 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1425 1426 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1427 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1428 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1429 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1430 for (i=0; i<n_D; i++) { 1431 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1432 } 1433 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1434 for (i=0, j=0; i<n_R; i++) { 1435 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1436 aux_array1[j++] = i; 1437 } 1438 } 1439 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1440 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1441 for (i=0, j=0; i<n_B; i++) { 1442 if (!PetscBTLookup(bitmask,is_indices[i])) { 1443 aux_array2[j++] = i; 1444 } 1445 } 1446 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1447 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1448 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1449 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1450 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1451 1452 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1453 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1454 for (i=0, j=0; i<n_R; i++) { 1455 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1456 aux_array1[j++] = i; 1457 } 1458 } 1459 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1460 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1461 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1462 } 1463 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1464 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1465 } else { 1466 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1467 IS tis; 1468 PetscInt schur_size; 1469 1470 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1471 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1472 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1473 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1474 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1475 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1476 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1477 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1478 } 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 1484 #undef __FUNCT__ 1485 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1486 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1487 { 1488 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1489 PC_IS *pcis = (PC_IS*)pc->data; 1490 PC pc_temp; 1491 Mat A_RR; 1492 MatReuse reuse; 1493 PetscScalar m_one = -1.0; 1494 PetscReal value; 1495 PetscInt n_D,n_R; 1496 PetscBool use_exact,use_exact_reduced,issbaij; 1497 PetscErrorCode ierr; 1498 /* prefixes stuff */ 1499 char dir_prefix[256],neu_prefix[256],str_level[16]; 1500 size_t len; 1501 1502 PetscFunctionBegin; 1503 1504 /* compute prefixes */ 1505 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1506 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1507 if (!pcbddc->current_level) { 1508 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1509 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1510 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1511 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1512 } else { 1513 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1514 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1515 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1516 len -= 15; /* remove "pc_bddc_coarse_" */ 1517 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1518 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1519 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1520 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1521 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1522 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1523 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1524 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1525 } 1526 1527 /* DIRICHLET PROBLEM */ 1528 if (dirichlet) { 1529 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1530 if (pcbddc->local_mat->symmetric_set) { 1531 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1532 } 1533 /* Matrix for Dirichlet problem is pcis->A_II */ 1534 n_D = pcis->n - pcis->n_B; 1535 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1536 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1537 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1538 /* default */ 1539 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1540 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1541 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1542 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1543 if (issbaij) { 1544 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1545 } else { 1546 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1547 } 1548 /* Allow user's customization */ 1549 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1550 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1551 } 1552 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1553 if (sub_schurs->reuse_mumps) { 1554 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1555 1556 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1557 } 1558 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1559 if (!n_D) { 1560 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1561 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1562 } 1563 /* Set Up KSP for Dirichlet problem of BDDC */ 1564 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1565 /* set ksp_D into pcis data */ 1566 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1567 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1568 pcis->ksp_D = pcbddc->ksp_D; 1569 } 1570 1571 /* NEUMANN PROBLEM */ 1572 A_RR = 0; 1573 if (neumann) { 1574 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1575 PetscInt ibs,mbs; 1576 PetscBool issbaij; 1577 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1578 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1579 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1580 if (pcbddc->ksp_R) { /* already created ksp */ 1581 PetscInt nn_R; 1582 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1583 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1584 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1585 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1586 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1587 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1588 reuse = MAT_INITIAL_MATRIX; 1589 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1590 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1591 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1592 reuse = MAT_INITIAL_MATRIX; 1593 } else { /* safe to reuse the matrix */ 1594 reuse = MAT_REUSE_MATRIX; 1595 } 1596 } 1597 /* last check */ 1598 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1599 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1600 reuse = MAT_INITIAL_MATRIX; 1601 } 1602 } else { /* first time, so we need to create the matrix */ 1603 reuse = MAT_INITIAL_MATRIX; 1604 } 1605 /* extract A_RR */ 1606 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1607 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1608 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1609 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1610 if (matis->A == pcbddc->local_mat) { 1611 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1612 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1613 } else { 1614 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1615 } 1616 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1617 if (matis->A == pcbddc->local_mat) { 1618 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1619 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1620 } else { 1621 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1622 } 1623 } 1624 if (!sub_schurs->reuse_mumps) { 1625 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1626 if (pcbddc->local_mat->symmetric_set) { 1627 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1628 } 1629 } else { 1630 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1631 1632 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1633 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1634 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1635 } 1636 if (!pcbddc->ksp_R) { /* create object if not present */ 1637 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1638 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1639 /* default */ 1640 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1641 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1642 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1643 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1644 if (issbaij) { 1645 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1646 } else { 1647 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1648 } 1649 /* Allow user's customization */ 1650 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1651 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1652 } 1653 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1654 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1655 if (!n_R) { 1656 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1657 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1658 } 1659 /* Reuse MUMPS solver if it is present */ 1660 if (sub_schurs->reuse_mumps) { 1661 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1662 1663 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1664 } 1665 /* Set Up KSP for Neumann problem of BDDC */ 1666 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1667 } 1668 /* free Neumann problem's matrix */ 1669 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1670 1671 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1672 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1673 if (pcbddc->dbg_flag) { 1674 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1675 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1676 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1677 } 1678 if (dirichlet) { /* Dirichlet */ 1679 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1680 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1681 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1682 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1683 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1684 /* need to be adapted? */ 1685 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1686 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1687 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1688 /* print info */ 1689 if (pcbddc->dbg_flag) { 1690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1691 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1692 } 1693 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1694 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1695 } 1696 } 1697 if (neumann) { /* Neumann */ 1698 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1699 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1700 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1701 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1702 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1703 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1704 /* need to be adapted? */ 1705 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1706 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1707 /* print info */ 1708 if (pcbddc->dbg_flag) { 1709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1710 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1711 } 1712 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1713 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1714 } 1715 } 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1722 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1723 { 1724 PetscErrorCode ierr; 1725 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1726 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1727 1728 PetscFunctionBegin; 1729 if (!sub_schurs->reuse_mumps) { 1730 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1731 } 1732 if (!pcbddc->switch_static) { 1733 if (applytranspose && pcbddc->local_auxmat1) { 1734 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1735 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1736 } 1737 if (!sub_schurs->reuse_mumps) { 1738 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1739 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 } else { 1741 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1742 1743 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1744 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1745 } 1746 } else { 1747 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1748 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1750 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1751 if (applytranspose && pcbddc->local_auxmat1) { 1752 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1753 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1754 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1755 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1756 } 1757 } 1758 if (!sub_schurs->reuse_mumps) { 1759 if (applytranspose) { 1760 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1761 } else { 1762 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1763 } 1764 #if defined(PETSC_HAVE_MUMPS) 1765 } else { 1766 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1767 1768 if (applytranspose) { 1769 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1770 } else { 1771 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1772 } 1773 #endif 1774 } 1775 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1776 if (!pcbddc->switch_static) { 1777 if (!sub_schurs->reuse_mumps) { 1778 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1779 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 } else { 1781 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1782 1783 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1784 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1785 } 1786 if (!applytranspose && pcbddc->local_auxmat1) { 1787 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1788 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1789 } 1790 } else { 1791 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1795 if (!applytranspose && pcbddc->local_auxmat1) { 1796 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1797 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1798 } 1799 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1802 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1803 } 1804 PetscFunctionReturn(0); 1805 } 1806 1807 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1810 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1811 { 1812 PetscErrorCode ierr; 1813 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1814 PC_IS* pcis = (PC_IS*) (pc->data); 1815 const PetscScalar zero = 0.0; 1816 1817 PetscFunctionBegin; 1818 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1819 if (applytranspose) { 1820 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1821 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1822 } else { 1823 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1824 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1825 } 1826 /* start communications from local primal nodes to rhs of coarse solver */ 1827 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1828 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1829 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1830 1831 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1832 /* TODO remove null space when doing multilevel */ 1833 if (pcbddc->coarse_ksp) { 1834 Vec rhs,sol; 1835 1836 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1837 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1838 if (applytranspose) { 1839 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1840 } else { 1841 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1842 } 1843 } 1844 1845 /* Local solution on R nodes */ 1846 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1847 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1848 } 1849 1850 /* communications from coarse sol to local primal nodes */ 1851 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1852 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1853 1854 /* Sum contributions from two levels */ 1855 if (applytranspose) { 1856 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1857 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1858 } else { 1859 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1860 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1861 } 1862 PetscFunctionReturn(0); 1863 } 1864 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1867 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1868 { 1869 PetscErrorCode ierr; 1870 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1871 PetscScalar *array; 1872 Vec from,to; 1873 1874 PetscFunctionBegin; 1875 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1876 from = pcbddc->coarse_vec; 1877 to = pcbddc->vec1_P; 1878 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1879 Vec tvec; 1880 1881 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1882 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1883 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1884 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1885 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1886 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1887 } 1888 } else { /* from local to global -> put data in coarse right hand side */ 1889 from = pcbddc->vec1_P; 1890 to = pcbddc->coarse_vec; 1891 } 1892 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1898 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1899 { 1900 PetscErrorCode ierr; 1901 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1902 PetscScalar *array; 1903 Vec from,to; 1904 1905 PetscFunctionBegin; 1906 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1907 from = pcbddc->coarse_vec; 1908 to = pcbddc->vec1_P; 1909 } else { /* from local to global -> put data in coarse right hand side */ 1910 from = pcbddc->vec1_P; 1911 to = pcbddc->coarse_vec; 1912 } 1913 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1914 if (smode == SCATTER_FORWARD) { 1915 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1916 Vec tvec; 1917 1918 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1919 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1920 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1921 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1922 } 1923 } else { 1924 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1925 ierr = VecResetArray(from);CHKERRQ(ierr); 1926 } 1927 } 1928 PetscFunctionReturn(0); 1929 } 1930 1931 /* uncomment for testing purposes */ 1932 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1935 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1936 { 1937 PetscErrorCode ierr; 1938 PC_IS* pcis = (PC_IS*)(pc->data); 1939 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1940 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1941 /* one and zero */ 1942 PetscScalar one=1.0,zero=0.0; 1943 /* space to store constraints and their local indices */ 1944 PetscScalar *constraints_data; 1945 PetscInt *constraints_idxs,*constraints_idxs_B; 1946 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1947 PetscInt *constraints_n; 1948 /* iterators */ 1949 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1950 /* BLAS integers */ 1951 PetscBLASInt lwork,lierr; 1952 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1953 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1954 /* reuse */ 1955 PetscInt olocal_primal_size,olocal_primal_size_cc; 1956 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1957 /* change of basis */ 1958 PetscBool qr_needed; 1959 PetscBT change_basis,qr_needed_idx; 1960 /* auxiliary stuff */ 1961 PetscInt *nnz,*is_indices; 1962 PetscInt ncc; 1963 /* some quantities */ 1964 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1965 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1966 1967 PetscFunctionBegin; 1968 /* Destroy Mat objects computed previously */ 1969 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1970 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1971 /* save info on constraints from previous setup (if any) */ 1972 olocal_primal_size = pcbddc->local_primal_size; 1973 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1974 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1975 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1976 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1977 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1978 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1979 1980 /* print some info */ 1981 if (pcbddc->dbg_flag) { 1982 IS vertices; 1983 PetscInt nv,nedges,nfaces; 1984 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1985 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1986 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1987 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1993 } 1994 1995 if (!pcbddc->adaptive_selection) { 1996 IS ISForVertices,*ISForFaces,*ISForEdges; 1997 MatNullSpace nearnullsp; 1998 const Vec *nearnullvecs; 1999 Vec *localnearnullsp; 2000 PetscScalar *array; 2001 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2002 PetscBool nnsp_has_cnst; 2003 /* LAPACK working arrays for SVD or POD */ 2004 PetscBool skip_lapack,boolforchange; 2005 PetscScalar *work; 2006 PetscReal *singular_vals; 2007 #if defined(PETSC_USE_COMPLEX) 2008 PetscReal *rwork; 2009 #endif 2010 #if defined(PETSC_MISSING_LAPACK_GESVD) 2011 PetscScalar *temp_basis,*correlation_mat; 2012 #else 2013 PetscBLASInt dummy_int=1; 2014 PetscScalar dummy_scalar=1.; 2015 #endif 2016 2017 /* Get index sets for faces, edges and vertices from graph */ 2018 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2019 /* free unneeded index sets */ 2020 if (!pcbddc->use_vertices) { 2021 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2022 } 2023 if (!pcbddc->use_edges) { 2024 for (i=0;i<n_ISForEdges;i++) { 2025 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2026 } 2027 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2028 n_ISForEdges = 0; 2029 } 2030 if (!pcbddc->use_faces) { 2031 for (i=0;i<n_ISForFaces;i++) { 2032 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2033 } 2034 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2035 n_ISForFaces = 0; 2036 } 2037 2038 #if defined(PETSC_USE_DEBUG) 2039 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2040 Also use_change_of_basis should be consistent among processors */ 2041 if (pcbddc->NullSpace) { 2042 PetscBool tbool[2],gbool[2]; 2043 2044 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2045 pcbddc->use_change_of_basis = PETSC_TRUE; 2046 if (!ISForEdges) { 2047 pcbddc->use_change_on_faces = PETSC_TRUE; 2048 } 2049 } 2050 tbool[0] = pcbddc->use_change_of_basis; 2051 tbool[1] = pcbddc->use_change_on_faces; 2052 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2053 pcbddc->use_change_of_basis = gbool[0]; 2054 pcbddc->use_change_on_faces = gbool[1]; 2055 } 2056 #endif 2057 2058 /* check if near null space is attached to global mat */ 2059 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2060 if (nearnullsp) { 2061 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2062 /* remove any stored info */ 2063 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2064 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2065 /* store information for BDDC solver reuse */ 2066 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2067 pcbddc->onearnullspace = nearnullsp; 2068 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2069 for (i=0;i<nnsp_size;i++) { 2070 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2071 } 2072 } else { /* if near null space is not provided BDDC uses constants by default */ 2073 nnsp_size = 0; 2074 nnsp_has_cnst = PETSC_TRUE; 2075 } 2076 /* get max number of constraints on a single cc */ 2077 max_constraints = nnsp_size; 2078 if (nnsp_has_cnst) max_constraints++; 2079 2080 /* 2081 Evaluate maximum storage size needed by the procedure 2082 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2083 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2084 There can be multiple constraints per connected component 2085 */ 2086 n_vertices = 0; 2087 if (ISForVertices) { 2088 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2089 } 2090 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2091 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2092 2093 total_counts = n_ISForFaces+n_ISForEdges; 2094 total_counts *= max_constraints; 2095 total_counts += n_vertices; 2096 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2097 2098 total_counts = 0; 2099 max_size_of_constraint = 0; 2100 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2101 IS used_is; 2102 if (i<n_ISForEdges) { 2103 used_is = ISForEdges[i]; 2104 } else { 2105 used_is = ISForFaces[i-n_ISForEdges]; 2106 } 2107 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2108 total_counts += j; 2109 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2110 } 2111 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2112 2113 /* get local part of global near null space vectors */ 2114 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2115 for (k=0;k<nnsp_size;k++) { 2116 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2117 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 } 2120 2121 /* whether or not to skip lapack calls */ 2122 skip_lapack = PETSC_TRUE; 2123 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2124 2125 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2126 if (!skip_lapack) { 2127 PetscScalar temp_work; 2128 2129 #if defined(PETSC_MISSING_LAPACK_GESVD) 2130 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2131 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2132 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2133 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2134 #if defined(PETSC_USE_COMPLEX) 2135 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2136 #endif 2137 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2138 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2139 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2140 lwork = -1; 2141 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2142 #if !defined(PETSC_USE_COMPLEX) 2143 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2144 #else 2145 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2146 #endif 2147 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2148 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2149 #else /* on missing GESVD */ 2150 /* SVD */ 2151 PetscInt max_n,min_n; 2152 max_n = max_size_of_constraint; 2153 min_n = max_constraints; 2154 if (max_size_of_constraint < max_constraints) { 2155 min_n = max_size_of_constraint; 2156 max_n = max_constraints; 2157 } 2158 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2159 #if defined(PETSC_USE_COMPLEX) 2160 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2161 #endif 2162 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2163 lwork = -1; 2164 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2165 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2166 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2167 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2168 #if !defined(PETSC_USE_COMPLEX) 2169 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2170 #else 2171 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2172 #endif 2173 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2174 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2175 #endif /* on missing GESVD */ 2176 /* Allocate optimal workspace */ 2177 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2178 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2179 } 2180 /* Now we can loop on constraining sets */ 2181 total_counts = 0; 2182 constraints_idxs_ptr[0] = 0; 2183 constraints_data_ptr[0] = 0; 2184 /* vertices */ 2185 if (n_vertices) { 2186 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2187 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2188 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2189 for (i=0;i<n_vertices;i++) { 2190 constraints_n[total_counts] = 1; 2191 constraints_data[total_counts] = 1.0; 2192 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2193 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2194 total_counts++; 2195 } 2196 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2197 PetscBool used_vertex; 2198 for (i=0;i<n_vertices;i++) { 2199 used_vertex = PETSC_FALSE; 2200 k = 0; 2201 while (!used_vertex && k<nnsp_size) { 2202 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2203 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2204 constraints_n[total_counts] = 1; 2205 constraints_idxs[total_counts] = is_indices[i]; 2206 constraints_data[total_counts] = 1.0; 2207 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2208 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2209 total_counts++; 2210 used_vertex = PETSC_TRUE; 2211 } 2212 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2213 k++; 2214 } 2215 } 2216 } 2217 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2218 n_vertices = total_counts; 2219 } 2220 2221 /* edges and faces */ 2222 total_counts_cc = total_counts; 2223 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2224 IS used_is; 2225 PetscBool idxs_copied = PETSC_FALSE; 2226 2227 if (ncc<n_ISForEdges) { 2228 used_is = ISForEdges[ncc]; 2229 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2230 } else { 2231 used_is = ISForFaces[ncc-n_ISForEdges]; 2232 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2233 } 2234 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2235 2236 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2237 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2238 /* change of basis should not be performed on local periodic nodes */ 2239 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2240 if (nnsp_has_cnst) { 2241 PetscScalar quad_value; 2242 2243 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2244 idxs_copied = PETSC_TRUE; 2245 2246 if (!pcbddc->use_nnsp_true) { 2247 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2248 } else { 2249 quad_value = 1.0; 2250 } 2251 for (j=0;j<size_of_constraint;j++) { 2252 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2253 } 2254 temp_constraints++; 2255 total_counts++; 2256 } 2257 for (k=0;k<nnsp_size;k++) { 2258 PetscReal real_value; 2259 PetscScalar *ptr_to_data; 2260 2261 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2262 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2263 for (j=0;j<size_of_constraint;j++) { 2264 ptr_to_data[j] = array[is_indices[j]]; 2265 } 2266 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2267 /* check if array is null on the connected component */ 2268 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2269 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2270 if (real_value > 0.0) { /* keep indices and values */ 2271 temp_constraints++; 2272 total_counts++; 2273 if (!idxs_copied) { 2274 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2275 idxs_copied = PETSC_TRUE; 2276 } 2277 } 2278 } 2279 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2280 valid_constraints = temp_constraints; 2281 if (!pcbddc->use_nnsp_true && temp_constraints) { 2282 if (temp_constraints == 1) { /* just normalize the constraint */ 2283 PetscScalar norm,*ptr_to_data; 2284 2285 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2286 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2287 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2288 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2289 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2290 } else { /* perform SVD */ 2291 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2292 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2293 2294 #if defined(PETSC_MISSING_LAPACK_GESVD) 2295 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2296 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2297 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2298 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2299 from that computed using LAPACKgesvd 2300 -> This is due to a different computation of eigenvectors in LAPACKheev 2301 -> The quality of the POD-computed basis will be the same */ 2302 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2303 /* Store upper triangular part of correlation matrix */ 2304 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2305 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2306 for (j=0;j<temp_constraints;j++) { 2307 for (k=0;k<j+1;k++) { 2308 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2309 } 2310 } 2311 /* compute eigenvalues and eigenvectors of correlation matrix */ 2312 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2313 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2314 #if !defined(PETSC_USE_COMPLEX) 2315 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2316 #else 2317 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2318 #endif 2319 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2320 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2321 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2322 j = 0; 2323 while (j < temp_constraints && singular_vals[j] < tol) j++; 2324 total_counts = total_counts-j; 2325 valid_constraints = temp_constraints-j; 2326 /* scale and copy POD basis into used quadrature memory */ 2327 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2331 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2333 if (j<temp_constraints) { 2334 PetscInt ii; 2335 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2336 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2337 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2338 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2339 for (k=0;k<temp_constraints-j;k++) { 2340 for (ii=0;ii<size_of_constraint;ii++) { 2341 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2342 } 2343 } 2344 } 2345 #else /* on missing GESVD */ 2346 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2347 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2348 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2349 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2350 #if !defined(PETSC_USE_COMPLEX) 2351 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2352 #else 2353 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2354 #endif 2355 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2356 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2357 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2358 k = temp_constraints; 2359 if (k > size_of_constraint) k = size_of_constraint; 2360 j = 0; 2361 while (j < k && singular_vals[k-j-1] < tol) j++; 2362 valid_constraints = k-j; 2363 total_counts = total_counts-temp_constraints+valid_constraints; 2364 #endif /* on missing GESVD */ 2365 } 2366 } 2367 /* update pointers information */ 2368 if (valid_constraints) { 2369 constraints_n[total_counts_cc] = valid_constraints; 2370 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2371 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2372 /* set change_of_basis flag */ 2373 if (boolforchange) { 2374 PetscBTSet(change_basis,total_counts_cc); 2375 } 2376 total_counts_cc++; 2377 } 2378 } 2379 /* free workspace */ 2380 if (!skip_lapack) { 2381 ierr = PetscFree(work);CHKERRQ(ierr); 2382 #if defined(PETSC_USE_COMPLEX) 2383 ierr = PetscFree(rwork);CHKERRQ(ierr); 2384 #endif 2385 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2386 #if defined(PETSC_MISSING_LAPACK_GESVD) 2387 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2388 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2389 #endif 2390 } 2391 for (k=0;k<nnsp_size;k++) { 2392 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2393 } 2394 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2395 /* free index sets of faces, edges and vertices */ 2396 for (i=0;i<n_ISForFaces;i++) { 2397 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2398 } 2399 if (n_ISForFaces) { 2400 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2401 } 2402 for (i=0;i<n_ISForEdges;i++) { 2403 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2404 } 2405 if (n_ISForEdges) { 2406 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2407 } 2408 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2409 } else { 2410 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2411 2412 total_counts = 0; 2413 n_vertices = 0; 2414 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2415 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2416 } 2417 max_constraints = 0; 2418 total_counts_cc = 0; 2419 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2420 total_counts += pcbddc->adaptive_constraints_n[i]; 2421 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2422 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2423 } 2424 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2425 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2426 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2427 constraints_data = pcbddc->adaptive_constraints_data; 2428 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2429 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2430 total_counts_cc = 0; 2431 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2432 if (pcbddc->adaptive_constraints_n[i]) { 2433 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2434 } 2435 } 2436 #if 0 2437 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2438 for (i=0;i<total_counts_cc;i++) { 2439 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2440 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2441 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2442 printf(" %d",constraints_idxs[j]); 2443 } 2444 printf("\n"); 2445 printf("number of cc: %d\n",constraints_n[i]); 2446 } 2447 for (i=0;i<n_vertices;i++) { 2448 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2449 } 2450 for (i=0;i<sub_schurs->n_subs;i++) { 2451 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2452 } 2453 #endif 2454 2455 max_size_of_constraint = 0; 2456 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2457 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2458 /* Change of basis */ 2459 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2460 if (pcbddc->use_change_of_basis) { 2461 for (i=0;i<sub_schurs->n_subs;i++) { 2462 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2463 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2464 } 2465 } 2466 } 2467 } 2468 pcbddc->local_primal_size = total_counts; 2469 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2470 2471 /* map constraints_idxs in boundary numbering */ 2472 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2473 if (i != constraints_idxs_ptr[total_counts_cc]) { 2474 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2475 } 2476 2477 /* Create constraint matrix */ 2478 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2479 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2480 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2481 2482 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2483 /* determine if a QR strategy is needed for change of basis */ 2484 qr_needed = PETSC_FALSE; 2485 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2486 total_primal_vertices=0; 2487 pcbddc->local_primal_size_cc = 0; 2488 for (i=0;i<total_counts_cc;i++) { 2489 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2490 if (size_of_constraint == 1) { 2491 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2492 pcbddc->local_primal_size_cc += 1; 2493 } else if (PetscBTLookup(change_basis,i)) { 2494 for (k=0;k<constraints_n[i];k++) { 2495 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2496 } 2497 pcbddc->local_primal_size_cc += constraints_n[i]; 2498 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2499 PetscBTSet(qr_needed_idx,i); 2500 qr_needed = PETSC_TRUE; 2501 } 2502 } else { 2503 pcbddc->local_primal_size_cc += 1; 2504 } 2505 } 2506 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2507 pcbddc->n_vertices = total_primal_vertices; 2508 /* permute indices in order to have a sorted set of vertices */ 2509 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2510 2511 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2512 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2513 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2514 2515 /* nonzero structure of constraint matrix */ 2516 /* and get reference dof for local constraints */ 2517 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2518 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2519 2520 j = total_primal_vertices; 2521 total_counts = total_primal_vertices; 2522 cum = total_primal_vertices; 2523 for (i=n_vertices;i<total_counts_cc;i++) { 2524 if (!PetscBTLookup(change_basis,i)) { 2525 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2526 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2527 cum++; 2528 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2529 for (k=0;k<constraints_n[i];k++) { 2530 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2531 nnz[j+k] = size_of_constraint; 2532 } 2533 j += constraints_n[i]; 2534 } 2535 } 2536 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2537 ierr = PetscFree(nnz);CHKERRQ(ierr); 2538 2539 /* set values in constraint matrix */ 2540 for (i=0;i<total_primal_vertices;i++) { 2541 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2542 } 2543 total_counts = total_primal_vertices; 2544 for (i=n_vertices;i<total_counts_cc;i++) { 2545 if (!PetscBTLookup(change_basis,i)) { 2546 PetscInt *cols; 2547 2548 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2549 cols = constraints_idxs+constraints_idxs_ptr[i]; 2550 for (k=0;k<constraints_n[i];k++) { 2551 PetscInt row = total_counts+k; 2552 PetscScalar *vals; 2553 2554 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2555 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2556 } 2557 total_counts += constraints_n[i]; 2558 } 2559 } 2560 /* assembling */ 2561 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2562 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2563 2564 /* 2565 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2566 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2567 */ 2568 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2569 if (pcbddc->use_change_of_basis) { 2570 /* dual and primal dofs on a single cc */ 2571 PetscInt dual_dofs,primal_dofs; 2572 /* working stuff for GEQRF */ 2573 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2574 PetscBLASInt lqr_work; 2575 /* working stuff for UNGQR */ 2576 PetscScalar *gqr_work,lgqr_work_t; 2577 PetscBLASInt lgqr_work; 2578 /* working stuff for TRTRS */ 2579 PetscScalar *trs_rhs; 2580 PetscBLASInt Blas_NRHS; 2581 /* pointers for values insertion into change of basis matrix */ 2582 PetscInt *start_rows,*start_cols; 2583 PetscScalar *start_vals; 2584 /* working stuff for values insertion */ 2585 PetscBT is_primal; 2586 PetscInt *aux_primal_numbering_B; 2587 /* matrix sizes */ 2588 PetscInt global_size,local_size; 2589 /* temporary change of basis */ 2590 Mat localChangeOfBasisMatrix; 2591 /* extra space for debugging */ 2592 PetscScalar *dbg_work; 2593 2594 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2595 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2596 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2597 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2598 /* nonzeros for local mat */ 2599 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2600 for (i=0;i<pcis->n;i++) nnz[i]=1; 2601 for (i=n_vertices;i<total_counts_cc;i++) { 2602 if (PetscBTLookup(change_basis,i)) { 2603 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2604 if (PetscBTLookup(qr_needed_idx,i)) { 2605 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2606 } else { 2607 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2608 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2609 } 2610 } 2611 } 2612 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2613 ierr = PetscFree(nnz);CHKERRQ(ierr); 2614 /* Set initial identity in the matrix */ 2615 for (i=0;i<pcis->n;i++) { 2616 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2617 } 2618 2619 if (pcbddc->dbg_flag) { 2620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2622 } 2623 2624 2625 /* Now we loop on the constraints which need a change of basis */ 2626 /* 2627 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2628 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2629 2630 Basic blocks of change of basis matrix T computed by 2631 2632 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2633 2634 | 1 0 ... 0 s_1/S | 2635 | 0 1 ... 0 s_2/S | 2636 | ... | 2637 | 0 ... 1 s_{n-1}/S | 2638 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2639 2640 with S = \sum_{i=1}^n s_i^2 2641 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2642 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2643 2644 - QR decomposition of constraints otherwise 2645 */ 2646 if (qr_needed) { 2647 /* space to store Q */ 2648 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2649 /* first we issue queries for optimal work */ 2650 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2651 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2652 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2653 lqr_work = -1; 2654 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2655 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2656 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2657 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2658 lgqr_work = -1; 2659 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2660 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2661 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2662 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2663 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2664 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2665 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2666 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2667 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2668 /* array to store scaling factors for reflectors */ 2669 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2670 /* array to store rhs and solution of triangular solver */ 2671 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2672 /* allocating workspace for check */ 2673 if (pcbddc->dbg_flag) { 2674 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2675 } 2676 } 2677 /* array to store whether a node is primal or not */ 2678 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2679 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2680 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2681 if (i != total_primal_vertices) { 2682 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2683 } 2684 for (i=0;i<total_primal_vertices;i++) { 2685 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2686 } 2687 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2688 2689 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2690 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2691 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2692 if (PetscBTLookup(change_basis,total_counts)) { 2693 /* get constraint info */ 2694 primal_dofs = constraints_n[total_counts]; 2695 dual_dofs = size_of_constraint-primal_dofs; 2696 2697 if (pcbddc->dbg_flag) { 2698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2699 } 2700 2701 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2702 2703 /* copy quadrature constraints for change of basis check */ 2704 if (pcbddc->dbg_flag) { 2705 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2706 } 2707 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2708 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2709 2710 /* compute QR decomposition of constraints */ 2711 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2712 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2713 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2714 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2715 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2716 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2717 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2718 2719 /* explictly compute R^-T */ 2720 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2721 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2722 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2726 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2727 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2728 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2729 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2730 2731 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2734 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2735 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2736 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2737 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2738 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2739 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2740 2741 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2742 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2743 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2744 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2745 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2747 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2748 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2749 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2750 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2751 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2752 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2753 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2754 2755 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2756 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2757 /* insert cols for primal dofs */ 2758 for (j=0;j<primal_dofs;j++) { 2759 start_vals = &qr_basis[j*size_of_constraint]; 2760 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2761 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2762 } 2763 /* insert cols for dual dofs */ 2764 for (j=0,k=0;j<dual_dofs;k++) { 2765 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2766 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2767 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2768 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2769 j++; 2770 } 2771 } 2772 2773 /* check change of basis */ 2774 if (pcbddc->dbg_flag) { 2775 PetscInt ii,jj; 2776 PetscBool valid_qr=PETSC_TRUE; 2777 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2782 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2783 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2784 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2785 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2786 for (jj=0;jj<size_of_constraint;jj++) { 2787 for (ii=0;ii<primal_dofs;ii++) { 2788 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2789 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2790 } 2791 } 2792 if (!valid_qr) { 2793 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2794 for (jj=0;jj<size_of_constraint;jj++) { 2795 for (ii=0;ii<primal_dofs;ii++) { 2796 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2797 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2798 } 2799 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2800 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2801 } 2802 } 2803 } 2804 } else { 2805 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2806 } 2807 } 2808 } else { /* simple transformation block */ 2809 PetscInt row,col; 2810 PetscScalar val,norm; 2811 2812 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2813 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2814 for (j=0;j<size_of_constraint;j++) { 2815 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2816 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2817 if (!PetscBTLookup(is_primal,row_B)) { 2818 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2819 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2820 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2821 } else { 2822 for (k=0;k<size_of_constraint;k++) { 2823 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2824 if (row != col) { 2825 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2826 } else { 2827 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2828 } 2829 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2830 } 2831 } 2832 } 2833 if (pcbddc->dbg_flag) { 2834 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2835 } 2836 } 2837 } else { 2838 if (pcbddc->dbg_flag) { 2839 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2840 } 2841 } 2842 } 2843 2844 /* free workspace */ 2845 if (qr_needed) { 2846 if (pcbddc->dbg_flag) { 2847 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2848 } 2849 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2850 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2851 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2852 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2853 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2854 } 2855 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2856 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2857 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2858 2859 /* assembling of global change of variable */ 2860 { 2861 Mat tmat; 2862 PetscInt bs; 2863 2864 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2865 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2866 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2867 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2868 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2869 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2870 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2871 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2872 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2873 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2874 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2875 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2876 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2877 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2878 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2879 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2880 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2881 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2882 } 2883 /* check */ 2884 if (pcbddc->dbg_flag) { 2885 PetscReal error; 2886 Vec x,x_change; 2887 2888 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2889 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2890 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2891 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2892 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2893 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2894 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2895 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2896 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2897 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2898 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2899 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2900 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2901 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2902 ierr = VecDestroy(&x);CHKERRQ(ierr); 2903 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2904 } 2905 2906 /* adapt sub_schurs computed (if any) */ 2907 if (pcbddc->use_deluxe_scaling) { 2908 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2909 if (sub_schurs->S_Ej_all) { 2910 Mat S_new,tmat; 2911 IS is_all_N; 2912 2913 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2914 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2915 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2916 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2917 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2918 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2919 sub_schurs->S_Ej_all = S_new; 2920 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2921 if (sub_schurs->sum_S_Ej_all) { 2922 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2923 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2924 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2925 sub_schurs->sum_S_Ej_all = S_new; 2926 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2927 } 2928 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2929 } 2930 } 2931 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2932 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2933 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2934 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2935 } 2936 2937 /* set up change of basis context */ 2938 if (pcbddc->ChangeOfBasisMatrix) { 2939 PCBDDCChange_ctx change_ctx; 2940 2941 if (!pcbddc->new_global_mat) { 2942 PetscInt global_size,local_size; 2943 2944 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2945 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2946 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2947 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2948 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2949 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2950 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2951 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2952 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2953 } else { 2954 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2955 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2956 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2957 } 2958 if (!pcbddc->user_ChangeOfBasisMatrix) { 2959 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2960 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2961 } else { 2962 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2963 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2964 } 2965 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2966 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2967 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2968 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2969 } 2970 2971 /* check if a new primal space has been introduced */ 2972 pcbddc->new_primal_space_local = PETSC_TRUE; 2973 if (olocal_primal_size == pcbddc->local_primal_size) { 2974 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2975 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2976 if (!pcbddc->new_primal_space_local) { 2977 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2978 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2979 } 2980 } 2981 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2982 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2983 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2984 2985 /* flush dbg viewer */ 2986 if (pcbddc->dbg_flag) { 2987 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2988 } 2989 2990 /* free workspace */ 2991 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2992 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2993 if (!pcbddc->adaptive_selection) { 2994 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2995 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2996 } else { 2997 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2998 pcbddc->adaptive_constraints_idxs_ptr, 2999 pcbddc->adaptive_constraints_data_ptr, 3000 pcbddc->adaptive_constraints_idxs, 3001 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3002 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3003 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3004 } 3005 PetscFunctionReturn(0); 3006 } 3007 3008 #undef __FUNCT__ 3009 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3010 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3011 { 3012 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3013 PC_IS *pcis = (PC_IS*)pc->data; 3014 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3015 PetscInt ierr,i,vertex_size,N; 3016 PetscViewer viewer=pcbddc->dbg_viewer; 3017 3018 PetscFunctionBegin; 3019 /* Reset previously computed graph */ 3020 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3021 /* Init local Graph struct */ 3022 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3023 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3024 3025 /* Check validity of the csr graph passed in by the user */ 3026 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3027 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3028 } 3029 3030 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3031 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3032 PetscInt *xadj,*adjncy; 3033 PetscInt nvtxs; 3034 PetscBool flg_row=PETSC_FALSE; 3035 3036 if (pcbddc->use_local_adj) { 3037 3038 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3039 if (flg_row) { 3040 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3041 pcbddc->computed_rowadj = PETSC_TRUE; 3042 } 3043 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3044 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3045 IS is_dummy; 3046 ISLocalToGlobalMapping l2gmap_dummy; 3047 PetscInt j,sum; 3048 PetscInt *cxadj,*cadjncy; 3049 const PetscInt *idxs; 3050 PCBDDCGraph graph; 3051 PetscBT is_on_boundary; 3052 3053 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3054 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3055 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3056 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3057 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3058 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3059 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3060 if (flg_row) { 3061 graph->xadj = xadj; 3062 graph->adjncy = adjncy; 3063 } 3064 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3065 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3066 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3067 3068 if (pcbddc->dbg_flag) { 3069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3070 for (i=0;i<graph->ncc;i++) { 3071 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3072 } 3073 } 3074 3075 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3076 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3077 for (i=0;i<pcis->n_B;i++) { 3078 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3079 } 3080 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3081 3082 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3083 sum = 0; 3084 for (i=0;i<graph->ncc;i++) { 3085 PetscInt sizecc = 0; 3086 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3087 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3088 sizecc++; 3089 } 3090 } 3091 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3092 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3093 cxadj[graph->queue[j]] = sizecc; 3094 } 3095 } 3096 sum += sizecc*sizecc; 3097 } 3098 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3099 sum = 0; 3100 for (i=0;i<pcis->n;i++) { 3101 PetscInt temp = cxadj[i]; 3102 cxadj[i] = sum; 3103 sum += temp; 3104 } 3105 cxadj[pcis->n] = sum; 3106 for (i=0;i<graph->ncc;i++) { 3107 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3108 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3109 PetscInt k,sizecc = 0; 3110 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3111 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3112 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3113 sizecc++; 3114 } 3115 } 3116 } 3117 } 3118 } 3119 if (sum) { 3120 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3121 } else { 3122 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3123 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3124 } 3125 graph->xadj = 0; 3126 graph->adjncy = 0; 3127 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3128 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3129 } 3130 } 3131 if (pcbddc->dbg_flag) { 3132 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3133 } 3134 3135 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3136 vertex_size = 1; 3137 if (pcbddc->user_provided_isfordofs) { 3138 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3139 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3140 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3141 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3142 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3143 } 3144 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3145 pcbddc->n_ISForDofs = 0; 3146 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3147 } 3148 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3149 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3150 } else { 3151 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3152 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3153 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3154 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3155 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3156 } 3157 } 3158 } 3159 3160 /* Setup of Graph */ 3161 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3162 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3163 } 3164 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3165 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3166 } 3167 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3168 3169 /* Graph's connected components analysis */ 3170 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3171 3172 /* print some info to stdout */ 3173 if (pcbddc->dbg_flag) { 3174 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3175 } 3176 3177 /* mark topography has done */ 3178 pcbddc->recompute_topography = PETSC_FALSE; 3179 PetscFunctionReturn(0); 3180 } 3181 3182 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3183 #undef __FUNCT__ 3184 #define __FUNCT__ "PCBDDCSubsetNumbering" 3185 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3186 { 3187 PetscSF sf; 3188 PetscLayout map; 3189 const PetscInt *idxs; 3190 PetscInt *leaf_data,*root_data,*gidxs; 3191 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3192 PetscInt n_n,nlocals,start,first_index; 3193 PetscMPIInt commsize; 3194 PetscBool first_found; 3195 PetscErrorCode ierr; 3196 3197 PetscFunctionBegin; 3198 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3199 if (subset_mult) { 3200 PetscCheckSameComm(subset,1,subset_mult,2); 3201 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3202 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3203 } 3204 /* create workspace layout for computing global indices of subset */ 3205 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3206 lbounds[0] = lbounds[1] = 0; 3207 for (i=0;i<n;i++) { 3208 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3209 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3210 } 3211 lbounds[0] = -lbounds[0]; 3212 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3213 gbounds[0] = -gbounds[0]; 3214 N = gbounds[1] - gbounds[0] + 1; 3215 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3216 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3217 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3218 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3219 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3220 3221 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3222 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3223 if (subset_mult) { 3224 const PetscInt* idxs_mult; 3225 3226 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3227 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3228 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3229 } else { 3230 for (i=0;i<n;i++) leaf_data[i] = 1; 3231 } 3232 /* local size of new subset */ 3233 n_n = 0; 3234 for (i=0;i<n;i++) n_n += leaf_data[i]; 3235 3236 /* global indexes in layout */ 3237 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3238 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3239 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3240 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3241 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3242 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3243 3244 /* reduce from leaves to roots */ 3245 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3246 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3247 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3248 3249 /* count indexes in local part of layout */ 3250 nlocals = 0; 3251 first_index = -1; 3252 first_found = PETSC_FALSE; 3253 for (i=0;i<Nl;i++) { 3254 if (!first_found && root_data[i]) { 3255 first_found = PETSC_TRUE; 3256 first_index = i; 3257 } 3258 nlocals += root_data[i]; 3259 } 3260 3261 /* cumulative of number of indexes and size of subset without holes */ 3262 #if defined(PETSC_HAVE_MPI_EXSCAN) 3263 start = 0; 3264 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3265 #else 3266 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3267 start = start-nlocals; 3268 #endif 3269 3270 if (N_n) { /* compute total size of new subset if requested */ 3271 *N_n = start + nlocals; 3272 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3273 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3274 } 3275 3276 /* adapt root data with cumulative */ 3277 if (first_found) { 3278 PetscInt old_index; 3279 3280 root_data[first_index] += start; 3281 old_index = first_index; 3282 for (i=first_index+1;i<Nl;i++) { 3283 if (root_data[i]) { 3284 root_data[i] += root_data[old_index]; 3285 old_index = i; 3286 } 3287 } 3288 } 3289 3290 /* from roots to leaves */ 3291 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3292 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3293 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3294 3295 /* create new IS with global indexes without holes */ 3296 if (subset_mult) { 3297 const PetscInt* idxs_mult; 3298 PetscInt cum; 3299 3300 cum = 0; 3301 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3302 for (i=0;i<n;i++) { 3303 PetscInt j; 3304 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3305 } 3306 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3307 } else { 3308 for (i=0;i<n;i++) { 3309 gidxs[i] = leaf_data[i]-1; 3310 } 3311 } 3312 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3313 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3314 PetscFunctionReturn(0); 3315 } 3316 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3319 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3320 { 3321 PetscInt i,j; 3322 PetscScalar *alphas; 3323 PetscErrorCode ierr; 3324 3325 PetscFunctionBegin; 3326 /* this implements stabilized Gram-Schmidt */ 3327 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3328 for (i=0;i<n;i++) { 3329 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3330 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3331 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3332 } 3333 ierr = PetscFree(alphas);CHKERRQ(ierr); 3334 PetscFunctionReturn(0); 3335 } 3336 3337 #undef __FUNCT__ 3338 #define __FUNCT__ "MatISGetSubassemblingPattern" 3339 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3340 { 3341 Mat_IS *matis; 3342 IS ranks_send_to; 3343 PetscInt n_neighs,*neighs,*n_shared,**shared; 3344 PetscMPIInt size,rank,color; 3345 PetscInt *xadj,*adjncy; 3346 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3347 PetscInt i,local_size,threshold=0; 3348 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3349 PetscSubcomm subcomm; 3350 PetscErrorCode ierr; 3351 3352 PetscFunctionBegin; 3353 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3354 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3355 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3356 3357 /* Get info on mapping */ 3358 matis = (Mat_IS*)(mat->data); 3359 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3360 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3361 3362 /* build local CSR graph of subdomains' connectivity */ 3363 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3364 xadj[0] = 0; 3365 xadj[1] = PetscMax(n_neighs-1,0); 3366 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3367 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3368 3369 if (threshold) { 3370 PetscInt xadj_count = 0; 3371 for (i=1;i<n_neighs;i++) { 3372 if (n_shared[i] > threshold) { 3373 adjncy[xadj_count] = neighs[i]; 3374 adjncy_wgt[xadj_count] = n_shared[i]; 3375 xadj_count++; 3376 } 3377 } 3378 xadj[1] = xadj_count; 3379 } else { 3380 if (xadj[1]) { 3381 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3382 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3383 } 3384 } 3385 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3386 if (use_square) { 3387 for (i=0;i<xadj[1];i++) { 3388 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3389 } 3390 } 3391 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3392 3393 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3394 3395 /* 3396 Restrict work on active processes only. 3397 */ 3398 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3399 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3400 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3401 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3402 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3403 if (color) { 3404 ierr = PetscFree(xadj);CHKERRQ(ierr); 3405 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3406 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3407 } else { 3408 Mat subdomain_adj; 3409 IS new_ranks,new_ranks_contig; 3410 MatPartitioning partitioner; 3411 PetscInt prank,rstart=0,rend=0; 3412 PetscInt *is_indices,*oldranks; 3413 PetscBool aggregate; 3414 3415 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3416 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3417 prank = rank; 3418 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3419 /* 3420 for (i=0;i<size;i++) { 3421 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3422 } 3423 */ 3424 for (i=0;i<xadj[1];i++) { 3425 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3426 } 3427 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3428 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3429 if (aggregate) { 3430 PetscInt lrows,row,ncols,*cols; 3431 PetscMPIInt nrank; 3432 PetscScalar *vals; 3433 3434 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3435 lrows = 0; 3436 if (nrank<redprocs) { 3437 lrows = size/redprocs; 3438 if (nrank<size%redprocs) lrows++; 3439 } 3440 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3441 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3442 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3443 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3444 row = nrank; 3445 ncols = xadj[1]-xadj[0]; 3446 cols = adjncy; 3447 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3448 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3449 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3450 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3451 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3452 ierr = PetscFree(xadj);CHKERRQ(ierr); 3453 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3454 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3455 ierr = PetscFree(vals);CHKERRQ(ierr); 3456 } else { 3457 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3458 } 3459 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3460 3461 /* Partition */ 3462 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3463 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3464 if (use_vwgt) { 3465 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3466 v_wgt[0] = local_size; 3467 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3468 } 3469 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3470 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3471 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3472 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3473 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3474 3475 /* renumber new_ranks to avoid "holes" in new set of processors */ 3476 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3477 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3478 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3479 if (!redprocs) { 3480 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3481 } else { 3482 PetscInt idxs[1]; 3483 PetscMPIInt tag; 3484 MPI_Request *reqs; 3485 3486 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3487 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3488 for (i=rstart;i<rend;i++) { 3489 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3490 } 3491 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3492 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3493 ierr = PetscFree(reqs);CHKERRQ(ierr); 3494 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3495 } 3496 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3497 /* clean up */ 3498 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3499 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3500 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3501 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3502 } 3503 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3504 3505 /* assemble parallel IS for sends */ 3506 i = 1; 3507 if (color) i=0; 3508 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3509 /* get back IS */ 3510 *is_sends = ranks_send_to; 3511 PetscFunctionReturn(0); 3512 } 3513 3514 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3515 3516 #undef __FUNCT__ 3517 #define __FUNCT__ "MatISSubassemble" 3518 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3519 { 3520 Mat local_mat; 3521 Mat_IS *matis; 3522 IS is_sends_internal; 3523 PetscInt rows,cols,new_local_rows; 3524 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3525 PetscBool ismatis,isdense,newisdense,destroy_mat; 3526 ISLocalToGlobalMapping l2gmap; 3527 PetscInt* l2gmap_indices; 3528 const PetscInt* is_indices; 3529 MatType new_local_type; 3530 /* buffers */ 3531 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3532 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3533 PetscInt *recv_buffer_idxs_local; 3534 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3535 /* MPI */ 3536 MPI_Comm comm,comm_n; 3537 PetscSubcomm subcomm; 3538 PetscMPIInt n_sends,n_recvs,commsize; 3539 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3540 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3541 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3542 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3543 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3544 PetscErrorCode ierr; 3545 3546 PetscFunctionBegin; 3547 /* TODO: add missing checks */ 3548 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3549 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3550 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3551 PetscValidLogicalCollectiveInt(mat,nis,7); 3552 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3553 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3554 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3555 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3556 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3557 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3558 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3559 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3560 PetscInt mrows,mcols,mnrows,mncols; 3561 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3562 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3563 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3564 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3565 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3566 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3567 } 3568 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3569 PetscValidLogicalCollectiveInt(mat,bs,0); 3570 /* prepare IS for sending if not provided */ 3571 if (!is_sends) { 3572 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3573 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3574 } else { 3575 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3576 is_sends_internal = is_sends; 3577 } 3578 3579 /* get pointer of MATIS data */ 3580 matis = (Mat_IS*)mat->data; 3581 3582 /* get comm */ 3583 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3584 3585 /* compute number of sends */ 3586 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3587 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3588 3589 /* compute number of receives */ 3590 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3591 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3592 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3593 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3594 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3595 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3596 ierr = PetscFree(iflags);CHKERRQ(ierr); 3597 3598 /* restrict comm if requested */ 3599 subcomm = 0; 3600 destroy_mat = PETSC_FALSE; 3601 if (restrict_comm) { 3602 PetscMPIInt color,subcommsize; 3603 3604 color = 0; 3605 if (restrict_full) { 3606 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3607 } else { 3608 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3609 } 3610 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3611 subcommsize = commsize - subcommsize; 3612 /* check if reuse has been requested */ 3613 if (reuse == MAT_REUSE_MATRIX) { 3614 if (*mat_n) { 3615 PetscMPIInt subcommsize2; 3616 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3617 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3618 comm_n = PetscObjectComm((PetscObject)*mat_n); 3619 } else { 3620 comm_n = PETSC_COMM_SELF; 3621 } 3622 } else { /* MAT_INITIAL_MATRIX */ 3623 PetscMPIInt rank; 3624 3625 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3626 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3627 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3628 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3629 comm_n = PetscSubcommChild(subcomm); 3630 } 3631 /* flag to destroy *mat_n if not significative */ 3632 if (color) destroy_mat = PETSC_TRUE; 3633 } else { 3634 comm_n = comm; 3635 } 3636 3637 /* prepare send/receive buffers */ 3638 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3639 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3640 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3641 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3642 if (nis) { 3643 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3644 } 3645 3646 /* Get data from local matrices */ 3647 if (!isdense) { 3648 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3649 /* TODO: See below some guidelines on how to prepare the local buffers */ 3650 /* 3651 send_buffer_vals should contain the raw values of the local matrix 3652 send_buffer_idxs should contain: 3653 - MatType_PRIVATE type 3654 - PetscInt size_of_l2gmap 3655 - PetscInt global_row_indices[size_of_l2gmap] 3656 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3657 */ 3658 } else { 3659 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3660 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3661 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3662 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3663 send_buffer_idxs[1] = i; 3664 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3665 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3666 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3667 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3668 for (i=0;i<n_sends;i++) { 3669 ilengths_vals[is_indices[i]] = len*len; 3670 ilengths_idxs[is_indices[i]] = len+2; 3671 } 3672 } 3673 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3674 /* additional is (if any) */ 3675 if (nis) { 3676 PetscMPIInt psum; 3677 PetscInt j; 3678 for (j=0,psum=0;j<nis;j++) { 3679 PetscInt plen; 3680 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3681 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3682 psum += len+1; /* indices + lenght */ 3683 } 3684 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3685 for (j=0,psum=0;j<nis;j++) { 3686 PetscInt plen; 3687 const PetscInt *is_array_idxs; 3688 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3689 send_buffer_idxs_is[psum] = plen; 3690 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3691 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3692 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3693 psum += plen+1; /* indices + lenght */ 3694 } 3695 for (i=0;i<n_sends;i++) { 3696 ilengths_idxs_is[is_indices[i]] = psum; 3697 } 3698 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3699 } 3700 3701 buf_size_idxs = 0; 3702 buf_size_vals = 0; 3703 buf_size_idxs_is = 0; 3704 for (i=0;i<n_recvs;i++) { 3705 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3706 buf_size_vals += (PetscInt)olengths_vals[i]; 3707 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3708 } 3709 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3710 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3711 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3712 3713 /* get new tags for clean communications */ 3714 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3715 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3716 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3717 3718 /* allocate for requests */ 3719 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3720 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3721 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3722 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3723 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3724 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3725 3726 /* communications */ 3727 ptr_idxs = recv_buffer_idxs; 3728 ptr_vals = recv_buffer_vals; 3729 ptr_idxs_is = recv_buffer_idxs_is; 3730 for (i=0;i<n_recvs;i++) { 3731 source_dest = onodes[i]; 3732 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3733 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3734 ptr_idxs += olengths_idxs[i]; 3735 ptr_vals += olengths_vals[i]; 3736 if (nis) { 3737 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3738 ptr_idxs_is += olengths_idxs_is[i]; 3739 } 3740 } 3741 for (i=0;i<n_sends;i++) { 3742 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3743 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3744 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3745 if (nis) { 3746 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3747 } 3748 } 3749 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3750 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3751 3752 /* assemble new l2g map */ 3753 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3754 ptr_idxs = recv_buffer_idxs; 3755 new_local_rows = 0; 3756 for (i=0;i<n_recvs;i++) { 3757 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3758 ptr_idxs += olengths_idxs[i]; 3759 } 3760 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3761 ptr_idxs = recv_buffer_idxs; 3762 new_local_rows = 0; 3763 for (i=0;i<n_recvs;i++) { 3764 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3765 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3766 ptr_idxs += olengths_idxs[i]; 3767 } 3768 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3769 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3770 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3771 3772 /* infer new local matrix type from received local matrices type */ 3773 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3774 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3775 if (n_recvs) { 3776 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3777 ptr_idxs = recv_buffer_idxs; 3778 for (i=0;i<n_recvs;i++) { 3779 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3780 new_local_type_private = MATAIJ_PRIVATE; 3781 break; 3782 } 3783 ptr_idxs += olengths_idxs[i]; 3784 } 3785 switch (new_local_type_private) { 3786 case MATDENSE_PRIVATE: 3787 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3788 new_local_type = MATSEQAIJ; 3789 bs = 1; 3790 } else { /* if I receive only 1 dense matrix */ 3791 new_local_type = MATSEQDENSE; 3792 bs = 1; 3793 } 3794 break; 3795 case MATAIJ_PRIVATE: 3796 new_local_type = MATSEQAIJ; 3797 bs = 1; 3798 break; 3799 case MATBAIJ_PRIVATE: 3800 new_local_type = MATSEQBAIJ; 3801 break; 3802 case MATSBAIJ_PRIVATE: 3803 new_local_type = MATSEQSBAIJ; 3804 break; 3805 default: 3806 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3807 break; 3808 } 3809 } else { /* by default, new_local_type is seqdense */ 3810 new_local_type = MATSEQDENSE; 3811 bs = 1; 3812 } 3813 3814 /* create MATIS object if needed */ 3815 if (reuse == MAT_INITIAL_MATRIX) { 3816 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3817 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3818 } else { 3819 /* it also destroys the local matrices */ 3820 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3821 } 3822 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3823 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3824 3825 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3826 3827 /* Global to local map of received indices */ 3828 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3829 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3830 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3831 3832 /* restore attributes -> type of incoming data and its size */ 3833 buf_size_idxs = 0; 3834 for (i=0;i<n_recvs;i++) { 3835 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3836 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3837 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3838 } 3839 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3840 3841 /* set preallocation */ 3842 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3843 if (!newisdense) { 3844 PetscInt *new_local_nnz=0; 3845 3846 ptr_vals = recv_buffer_vals; 3847 ptr_idxs = recv_buffer_idxs_local; 3848 if (n_recvs) { 3849 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3850 } 3851 for (i=0;i<n_recvs;i++) { 3852 PetscInt j; 3853 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3854 for (j=0;j<*(ptr_idxs+1);j++) { 3855 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3856 } 3857 } else { 3858 /* TODO */ 3859 } 3860 ptr_idxs += olengths_idxs[i]; 3861 } 3862 if (new_local_nnz) { 3863 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3864 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3865 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3866 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3867 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3868 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3869 } else { 3870 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3871 } 3872 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3873 } else { 3874 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3875 } 3876 3877 /* set values */ 3878 ptr_vals = recv_buffer_vals; 3879 ptr_idxs = recv_buffer_idxs_local; 3880 for (i=0;i<n_recvs;i++) { 3881 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3882 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3883 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3884 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3885 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3886 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3887 } else { 3888 /* TODO */ 3889 } 3890 ptr_idxs += olengths_idxs[i]; 3891 ptr_vals += olengths_vals[i]; 3892 } 3893 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3894 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3895 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3896 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3897 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3898 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3899 3900 #if 0 3901 if (!restrict_comm) { /* check */ 3902 Vec lvec,rvec; 3903 PetscReal infty_error; 3904 3905 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3906 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3907 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3908 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3909 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3910 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3911 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3912 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3913 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3914 } 3915 #endif 3916 3917 /* assemble new additional is (if any) */ 3918 if (nis) { 3919 PetscInt **temp_idxs,*count_is,j,psum; 3920 3921 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3922 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3923 ptr_idxs = recv_buffer_idxs_is; 3924 psum = 0; 3925 for (i=0;i<n_recvs;i++) { 3926 for (j=0;j<nis;j++) { 3927 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3928 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3929 psum += plen; 3930 ptr_idxs += plen+1; /* shift pointer to received data */ 3931 } 3932 } 3933 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3934 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3935 for (i=1;i<nis;i++) { 3936 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3937 } 3938 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3939 ptr_idxs = recv_buffer_idxs_is; 3940 for (i=0;i<n_recvs;i++) { 3941 for (j=0;j<nis;j++) { 3942 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3943 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3944 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3945 ptr_idxs += plen+1; /* shift pointer to received data */ 3946 } 3947 } 3948 for (i=0;i<nis;i++) { 3949 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3950 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3951 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3952 } 3953 ierr = PetscFree(count_is);CHKERRQ(ierr); 3954 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3955 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3956 } 3957 /* free workspace */ 3958 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3959 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3960 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3961 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3962 if (isdense) { 3963 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3964 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3965 } else { 3966 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3967 } 3968 if (nis) { 3969 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3970 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3971 } 3972 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3973 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3974 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3975 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3976 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3977 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3978 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3979 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3980 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3981 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3982 ierr = PetscFree(onodes);CHKERRQ(ierr); 3983 if (nis) { 3984 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3985 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3986 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3987 } 3988 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3989 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3990 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3991 for (i=0;i<nis;i++) { 3992 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3993 } 3994 *mat_n = NULL; 3995 } 3996 PetscFunctionReturn(0); 3997 } 3998 3999 /* temporary hack into ksp private data structure */ 4000 #include <petsc/private/kspimpl.h> 4001 4002 #undef __FUNCT__ 4003 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4004 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4005 { 4006 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4007 PC_IS *pcis = (PC_IS*)pc->data; 4008 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4009 MatNullSpace CoarseNullSpace=NULL; 4010 ISLocalToGlobalMapping coarse_islg; 4011 IS coarse_is,*isarray; 4012 PetscInt i,im_active=-1,active_procs=-1; 4013 PetscInt nis,nisdofs,nisneu; 4014 PC pc_temp; 4015 PCType coarse_pc_type; 4016 KSPType coarse_ksp_type; 4017 PetscBool multilevel_requested,multilevel_allowed; 4018 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4019 Mat t_coarse_mat_is; 4020 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4021 PetscMPIInt all_procs; 4022 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4023 PetscBool compute_vecs = PETSC_FALSE; 4024 PetscScalar *array; 4025 PetscErrorCode ierr; 4026 4027 PetscFunctionBegin; 4028 /* Assign global numbering to coarse dofs */ 4029 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4030 PetscInt ocoarse_size; 4031 compute_vecs = PETSC_TRUE; 4032 ocoarse_size = pcbddc->coarse_size; 4033 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4034 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4035 /* see if we can avoid some work */ 4036 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4037 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4038 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4039 PC pc; 4040 PetscBool isbddc; 4041 4042 /* temporary workaround since PCBDDC does not have a reset method so far */ 4043 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4044 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4045 if (isbddc) { 4046 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4047 } 4048 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4049 coarse_reuse = PETSC_FALSE; 4050 } else { /* we can safely reuse already computed coarse matrix */ 4051 coarse_reuse = PETSC_TRUE; 4052 } 4053 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4054 coarse_reuse = PETSC_FALSE; 4055 } 4056 /* reset any subassembling information */ 4057 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4058 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4059 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4060 coarse_reuse = PETSC_TRUE; 4061 } 4062 4063 /* count "active" (i.e. with positive local size) and "void" processes */ 4064 im_active = !!(pcis->n); 4065 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4066 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4067 void_procs = all_procs-active_procs; 4068 csin_type_simple = PETSC_TRUE; 4069 redist = PETSC_FALSE; 4070 if (pcbddc->current_level && void_procs) { 4071 csin_ml = PETSC_TRUE; 4072 ncoarse_ml = void_procs; 4073 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4074 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4075 csin_ds = PETSC_TRUE; 4076 ncoarse_ds = pcbddc->redistribute_coarse; 4077 redist = PETSC_TRUE; 4078 } else { 4079 csin_ds = PETSC_TRUE; 4080 ncoarse_ds = active_procs; 4081 redist = PETSC_TRUE; 4082 } 4083 } else { 4084 csin_ml = PETSC_FALSE; 4085 ncoarse_ml = all_procs; 4086 if (void_procs) { 4087 csin_ds = PETSC_TRUE; 4088 ncoarse_ds = void_procs; 4089 csin_type_simple = PETSC_FALSE; 4090 } else { 4091 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4092 csin_ds = PETSC_TRUE; 4093 ncoarse_ds = pcbddc->redistribute_coarse; 4094 redist = PETSC_TRUE; 4095 } else { 4096 csin_ds = PETSC_FALSE; 4097 ncoarse_ds = all_procs; 4098 } 4099 } 4100 } 4101 4102 /* 4103 test if we can go multilevel: three conditions must be satisfied: 4104 - we have not exceeded the number of levels requested 4105 - we can actually subassemble the active processes 4106 - we can find a suitable number of MPI processes where we can place the subassembled problem 4107 */ 4108 multilevel_allowed = PETSC_FALSE; 4109 multilevel_requested = PETSC_FALSE; 4110 if (pcbddc->current_level < pcbddc->max_levels) { 4111 multilevel_requested = PETSC_TRUE; 4112 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4113 multilevel_allowed = PETSC_FALSE; 4114 } else { 4115 multilevel_allowed = PETSC_TRUE; 4116 } 4117 } 4118 /* determine number of process partecipating to coarse solver */ 4119 if (multilevel_allowed) { 4120 ncoarse = ncoarse_ml; 4121 csin = csin_ml; 4122 redist = PETSC_FALSE; 4123 } else { 4124 ncoarse = ncoarse_ds; 4125 csin = csin_ds; 4126 } 4127 4128 /* creates temporary l2gmap and IS for coarse indexes */ 4129 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4130 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4131 4132 /* creates temporary MATIS object for coarse matrix */ 4133 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4134 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4135 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4136 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4137 #if 0 4138 { 4139 PetscViewer viewer; 4140 char filename[256]; 4141 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4142 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4143 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4144 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4146 } 4147 #endif 4148 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4149 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4150 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4151 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4152 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4153 4154 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4155 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4156 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4157 const PetscInt *idxs; 4158 ISLocalToGlobalMapping tmap; 4159 4160 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4161 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4162 /* allocate space for temporary storage */ 4163 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4164 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4165 /* allocate for IS array */ 4166 nisdofs = pcbddc->n_ISForDofsLocal; 4167 nisneu = !!pcbddc->NeumannBoundariesLocal; 4168 nis = nisdofs + nisneu; 4169 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4170 /* dofs splitting */ 4171 for (i=0;i<nisdofs;i++) { 4172 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4173 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4174 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4175 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4176 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4177 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4178 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4179 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4180 } 4181 /* neumann boundaries */ 4182 if (pcbddc->NeumannBoundariesLocal) { 4183 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4184 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4185 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4186 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4187 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4188 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4189 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4190 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4191 } 4192 /* free memory */ 4193 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4194 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4195 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4196 } else { 4197 nis = 0; 4198 nisdofs = 0; 4199 nisneu = 0; 4200 isarray = NULL; 4201 } 4202 /* destroy no longer needed map */ 4203 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4204 4205 /* restrict on coarse candidates (if needed) */ 4206 coarse_mat_is = NULL; 4207 if (csin) { 4208 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4209 if (redist) { 4210 PetscMPIInt rank; 4211 PetscInt spc,n_spc_p1,dest[1],destsize; 4212 4213 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4214 spc = active_procs/ncoarse; 4215 n_spc_p1 = active_procs%ncoarse; 4216 if (im_active) { 4217 destsize = 1; 4218 if (rank > n_spc_p1*(spc+1)-1) { 4219 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4220 } else { 4221 dest[0] = rank/(spc+1); 4222 } 4223 } else { 4224 destsize = 0; 4225 } 4226 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4227 } else if (csin_type_simple) { 4228 PetscMPIInt rank; 4229 PetscInt issize,isidx; 4230 4231 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4232 if (im_active) { 4233 issize = 1; 4234 isidx = (PetscInt)rank; 4235 } else { 4236 issize = 0; 4237 isidx = -1; 4238 } 4239 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4240 } else { /* get a suitable subassembling pattern from MATIS code */ 4241 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4242 } 4243 4244 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4245 if (!redist || ncoarse <= void_procs) { 4246 PetscInt ncoarse_cand,tissize,*nisindices; 4247 PetscInt *coarse_candidates; 4248 const PetscInt* tisindices; 4249 4250 /* get coarse candidates' ranks in pc communicator */ 4251 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4252 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4253 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4254 if (!coarse_candidates[i]) { 4255 coarse_candidates[ncoarse_cand++]=i; 4256 } 4257 } 4258 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4259 4260 4261 if (pcbddc->dbg_flag) { 4262 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4264 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4266 for (i=0;i<ncoarse_cand;i++) { 4267 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4268 } 4269 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4271 } 4272 /* shift the pattern on coarse candidates */ 4273 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4274 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4275 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4276 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4277 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4278 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4279 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4280 } 4281 if (pcbddc->dbg_flag) { 4282 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4283 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4284 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4285 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4286 } 4287 } 4288 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4289 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4290 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4291 } else { /* this is the last level, so use just receiving processes in subcomm */ 4292 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4293 } 4294 } else { 4295 if (pcbddc->dbg_flag) { 4296 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4297 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4298 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4299 } 4300 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4301 coarse_mat_is = t_coarse_mat_is; 4302 } 4303 4304 /* create local to global scatters for coarse problem */ 4305 if (compute_vecs) { 4306 PetscInt lrows; 4307 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4308 if (coarse_mat_is) { 4309 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4310 } else { 4311 lrows = 0; 4312 } 4313 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4314 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4315 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4316 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4317 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4318 } 4319 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4320 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4321 4322 /* set defaults for coarse KSP and PC */ 4323 if (multilevel_allowed) { 4324 coarse_ksp_type = KSPRICHARDSON; 4325 coarse_pc_type = PCBDDC; 4326 } else { 4327 coarse_ksp_type = KSPPREONLY; 4328 coarse_pc_type = PCREDUNDANT; 4329 } 4330 4331 /* print some info if requested */ 4332 if (pcbddc->dbg_flag) { 4333 if (!multilevel_allowed) { 4334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4335 if (multilevel_requested) { 4336 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4337 } else if (pcbddc->max_levels) { 4338 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4339 } 4340 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4341 } 4342 } 4343 4344 /* create the coarse KSP object only once with defaults */ 4345 if (coarse_mat_is) { 4346 MatReuse coarse_mat_reuse; 4347 PetscViewer dbg_viewer = NULL; 4348 if (pcbddc->dbg_flag) { 4349 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4350 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4351 } 4352 if (!pcbddc->coarse_ksp) { 4353 char prefix[256],str_level[16]; 4354 size_t len; 4355 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4356 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4357 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4358 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4359 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4360 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4361 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4362 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4363 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4364 /* prefix */ 4365 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4366 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4367 if (!pcbddc->current_level) { 4368 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4369 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4370 } else { 4371 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4372 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4373 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4374 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4375 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4376 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4377 } 4378 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4379 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4380 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4381 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4382 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4383 /* allow user customization */ 4384 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4385 } 4386 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4387 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4388 if (nisdofs) { 4389 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4390 for (i=0;i<nisdofs;i++) { 4391 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4392 } 4393 } 4394 if (nisneu) { 4395 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4396 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4397 } 4398 4399 /* get some info after set from options */ 4400 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4401 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4402 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4403 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4404 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4405 isbddc = PETSC_FALSE; 4406 } 4407 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4408 if (isredundant) { 4409 KSP inner_ksp; 4410 PC inner_pc; 4411 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4412 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4413 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4414 } 4415 4416 /* assemble coarse matrix */ 4417 if (coarse_reuse) { 4418 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4419 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4420 coarse_mat_reuse = MAT_REUSE_MATRIX; 4421 } else { 4422 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4423 } 4424 if (isbddc || isnn) { 4425 if (pcbddc->coarsening_ratio > 1) { 4426 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4427 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4428 if (pcbddc->dbg_flag) { 4429 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4430 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4431 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4432 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4433 } 4434 } 4435 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4436 } else { 4437 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4438 coarse_mat = coarse_mat_is; 4439 } 4440 } else { 4441 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4442 } 4443 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4444 4445 /* propagate symmetry info of coarse matrix */ 4446 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4447 if (pc->pmat->symmetric_set) { 4448 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4449 } 4450 if (pc->pmat->hermitian_set) { 4451 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4452 } 4453 if (pc->pmat->spd_set) { 4454 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4455 } 4456 /* set operators */ 4457 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4458 if (pcbddc->dbg_flag) { 4459 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4460 } 4461 } else { /* processes non partecipating to coarse solver (if any) */ 4462 coarse_mat = 0; 4463 } 4464 ierr = PetscFree(isarray);CHKERRQ(ierr); 4465 #if 0 4466 { 4467 PetscViewer viewer; 4468 char filename[256]; 4469 sprintf(filename,"coarse_mat.m"); 4470 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4471 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4472 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4473 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4474 } 4475 #endif 4476 4477 /* Compute coarse null space (special handling by BDDC only) */ 4478 #if 0 4479 if (pcbddc->NullSpace) { 4480 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4481 } 4482 #endif 4483 4484 if (pcbddc->coarse_ksp) { 4485 Vec crhs,csol; 4486 PetscBool ispreonly; 4487 4488 if (CoarseNullSpace) { 4489 if (isbddc) { 4490 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4491 } else { 4492 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4493 } 4494 } 4495 /* setup coarse ksp */ 4496 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4497 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4498 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4499 /* hack */ 4500 if (!csol) { 4501 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4502 } 4503 if (!crhs) { 4504 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4505 } 4506 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4507 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4508 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4509 KSP check_ksp; 4510 KSPType check_ksp_type; 4511 PC check_pc; 4512 Vec check_vec,coarse_vec; 4513 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4514 PetscInt its; 4515 PetscBool compute_eigs; 4516 PetscReal *eigs_r,*eigs_c; 4517 PetscInt neigs; 4518 const char *prefix; 4519 4520 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4521 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4522 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4523 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4524 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4525 if (ispreonly) { 4526 check_ksp_type = KSPPREONLY; 4527 compute_eigs = PETSC_FALSE; 4528 } else { 4529 check_ksp_type = KSPGMRES; 4530 compute_eigs = PETSC_TRUE; 4531 } 4532 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4533 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4534 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4535 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4536 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4537 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4538 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4539 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4540 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4541 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4542 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4543 /* create random vec */ 4544 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4545 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4546 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4547 if (CoarseNullSpace) { 4548 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4549 } 4550 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4551 /* solve coarse problem */ 4552 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4553 if (CoarseNullSpace) { 4554 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4555 } 4556 /* set eigenvalue estimation if preonly has not been requested */ 4557 if (compute_eigs) { 4558 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4559 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4560 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4561 lambda_max = eigs_r[neigs-1]; 4562 lambda_min = eigs_r[0]; 4563 if (pcbddc->use_coarse_estimates) { 4564 if (lambda_max>lambda_min) { 4565 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4566 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4567 } 4568 } 4569 } 4570 4571 /* check coarse problem residual error */ 4572 if (pcbddc->dbg_flag) { 4573 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4574 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4575 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4576 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4577 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4578 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4579 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4580 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4581 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4582 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4583 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4584 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4585 if (compute_eigs) { 4586 PetscReal lambda_max_s,lambda_min_s; 4587 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4588 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4589 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4590 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4591 for (i=0;i<neigs;i++) { 4592 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4593 } 4594 } 4595 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4596 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4597 } 4598 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4599 if (compute_eigs) { 4600 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4601 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4602 } 4603 } 4604 } 4605 /* print additional info */ 4606 if (pcbddc->dbg_flag) { 4607 /* waits until all processes reaches this point */ 4608 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4609 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4610 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4611 } 4612 4613 /* free memory */ 4614 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4615 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4616 PetscFunctionReturn(0); 4617 } 4618 4619 #undef __FUNCT__ 4620 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4621 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4622 { 4623 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4624 PC_IS* pcis = (PC_IS*)pc->data; 4625 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4626 IS subset,subset_mult,subset_n; 4627 PetscInt local_size,coarse_size=0; 4628 PetscInt *local_primal_indices=NULL; 4629 const PetscInt *t_local_primal_indices; 4630 PetscErrorCode ierr; 4631 4632 PetscFunctionBegin; 4633 /* Compute global number of coarse dofs */ 4634 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4635 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4636 } 4637 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4638 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,subset_n,&subset);CHKERRQ(ierr); 4639 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4640 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4641 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4642 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4643 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4644 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4645 if (local_size != pcbddc->local_primal_size) { 4646 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4647 } 4648 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4649 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4650 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4651 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4652 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4653 4654 /* check numbering */ 4655 if (pcbddc->dbg_flag) { 4656 PetscScalar coarsesum,*array; 4657 PetscInt i; 4658 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4659 4660 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4661 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4662 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4663 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4664 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4665 for (i=0;i<pcbddc->local_primal_size;i++) { 4666 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4667 } 4668 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4669 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4670 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4671 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4672 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4673 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4674 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4675 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4676 for (i=0;i<pcis->n;i++) { 4677 if (array[i] == 1.0) { 4678 set_error = PETSC_TRUE; 4679 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4680 } 4681 } 4682 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4683 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4684 for (i=0;i<pcis->n;i++) { 4685 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4686 } 4687 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4688 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4689 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4690 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4691 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4692 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4693 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4694 PetscInt *gidxs; 4695 4696 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4697 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4698 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4699 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4701 for (i=0;i<pcbddc->local_primal_size;i++) { 4702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4703 } 4704 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4705 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4706 } 4707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4708 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4709 } 4710 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4711 /* get back data */ 4712 *coarse_size_n = coarse_size; 4713 *local_primal_indices_n = local_primal_indices; 4714 PetscFunctionReturn(0); 4715 } 4716 4717 #undef __FUNCT__ 4718 #define __FUNCT__ "PCBDDCGlobalToLocal" 4719 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4720 { 4721 IS localis_t; 4722 PetscInt i,lsize,*idxs,n; 4723 PetscScalar *vals; 4724 PetscErrorCode ierr; 4725 4726 PetscFunctionBegin; 4727 /* get indices in local ordering exploiting local to global map */ 4728 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4729 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4730 for (i=0;i<lsize;i++) vals[i] = 1.0; 4731 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4732 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4733 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4734 if (idxs) { /* multilevel guard */ 4735 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4736 } 4737 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4738 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4739 ierr = PetscFree(vals);CHKERRQ(ierr); 4740 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4741 /* now compute set in local ordering */ 4742 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4743 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4744 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4745 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4746 for (i=0,lsize=0;i<n;i++) { 4747 if (PetscRealPart(vals[i]) > 0.5) { 4748 lsize++; 4749 } 4750 } 4751 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4752 for (i=0,lsize=0;i<n;i++) { 4753 if (PetscRealPart(vals[i]) > 0.5) { 4754 idxs[lsize++] = i; 4755 } 4756 } 4757 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4758 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4759 *localis = localis_t; 4760 PetscFunctionReturn(0); 4761 } 4762 4763 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4764 #undef __FUNCT__ 4765 #define __FUNCT__ "PCBDDCMatMult_Private" 4766 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4767 { 4768 PCBDDCChange_ctx change_ctx; 4769 PetscErrorCode ierr; 4770 4771 PetscFunctionBegin; 4772 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4773 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4774 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4775 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4776 PetscFunctionReturn(0); 4777 } 4778 4779 #undef __FUNCT__ 4780 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4781 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4782 { 4783 PCBDDCChange_ctx change_ctx; 4784 PetscErrorCode ierr; 4785 4786 PetscFunctionBegin; 4787 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4788 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4789 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4790 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4791 PetscFunctionReturn(0); 4792 } 4793 4794 #undef __FUNCT__ 4795 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4796 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4797 { 4798 PC_IS *pcis=(PC_IS*)pc->data; 4799 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4800 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4801 Mat S_j; 4802 PetscInt *used_xadj,*used_adjncy; 4803 PetscBool free_used_adj; 4804 PetscErrorCode ierr; 4805 4806 PetscFunctionBegin; 4807 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4808 free_used_adj = PETSC_FALSE; 4809 if (pcbddc->sub_schurs_layers == -1) { 4810 used_xadj = NULL; 4811 used_adjncy = NULL; 4812 } else { 4813 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4814 used_xadj = pcbddc->mat_graph->xadj; 4815 used_adjncy = pcbddc->mat_graph->adjncy; 4816 } else if (pcbddc->computed_rowadj) { 4817 used_xadj = pcbddc->mat_graph->xadj; 4818 used_adjncy = pcbddc->mat_graph->adjncy; 4819 } else { 4820 PetscBool flg_row=PETSC_FALSE; 4821 const PetscInt *xadj,*adjncy; 4822 PetscInt nvtxs; 4823 4824 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4825 if (flg_row) { 4826 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4827 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4828 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4829 free_used_adj = PETSC_TRUE; 4830 } else { 4831 pcbddc->sub_schurs_layers = -1; 4832 used_xadj = NULL; 4833 used_adjncy = NULL; 4834 } 4835 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4836 } 4837 } 4838 4839 /* setup sub_schurs data */ 4840 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4841 if (!sub_schurs->use_mumps) { 4842 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4843 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4844 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4845 } else { 4846 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4847 PetscBool isseqaij; 4848 if (!pcbddc->use_vertices && reuse_solvers) { 4849 PetscInt n_vertices; 4850 4851 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4852 reuse_solvers = (PetscBool)!n_vertices; 4853 } 4854 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4855 if (!isseqaij) { 4856 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4857 if (matis->A == pcbddc->local_mat) { 4858 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4859 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4860 } else { 4861 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4862 } 4863 } 4864 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4865 } 4866 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4867 4868 /* free adjacency */ 4869 if (free_used_adj) { 4870 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4871 } 4872 PetscFunctionReturn(0); 4873 } 4874 4875 #undef __FUNCT__ 4876 #define __FUNCT__ "PCBDDCInitSubSchurs" 4877 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4878 { 4879 PC_IS *pcis=(PC_IS*)pc->data; 4880 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4881 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4882 PCBDDCGraph graph; 4883 PetscErrorCode ierr; 4884 4885 PetscFunctionBegin; 4886 /* attach interface graph for determining subsets */ 4887 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4888 IS verticesIS,verticescomm; 4889 PetscInt vsize,*idxs; 4890 4891 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4892 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4893 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4894 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4895 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4896 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4897 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4898 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4899 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4900 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4901 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4902 /* 4903 if (pcbddc->dbg_flag) { 4904 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4905 } 4906 */ 4907 } else { 4908 graph = pcbddc->mat_graph; 4909 } 4910 4911 /* sub_schurs init */ 4912 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4913 4914 /* free graph struct */ 4915 if (pcbddc->sub_schurs_rebuild) { 4916 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4917 } 4918 PetscFunctionReturn(0); 4919 } 4920