1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 #if defined(PETSC_USE_COMPLEX) 92 rwork = NULL; 93 #endif 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 155 const PetscInt *idxs; 156 PetscReal infty = PETSC_MAX_REAL; 157 PetscInt j,subset_size,eigs_start = 0; 158 PetscBLASInt B_N; 159 PetscBool same_data = PETSC_FALSE; 160 161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 162 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 163 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 164 if (sub_schurs->is_hermitian) { 165 PetscInt j,k; 166 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 167 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 168 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 for (k=j;k<subset_size;k++) { 172 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 173 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 174 } 175 } 176 } else { 177 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 178 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 179 } 180 } else { 181 S = Sarray + cumarray; 182 St = Starray + cumarray; 183 } 184 185 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 186 /* see if we can save some work */ 187 if (sub_schurs->n_subs == 1) { 188 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 189 } 190 191 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 192 B_neigs = 0; 193 } else { 194 /* Threshold: this is an heuristic for edges */ 195 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 196 197 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 198 PetscBLASInt B_itype = 1; 199 PetscBLASInt B_IL, B_IU; 200 PetscReal eps = -1.0; /* dlamch? */ 201 PetscInt nmin_s; 202 203 /* ask for eigenvalues larger than thresh */ 204 if (pcbddc->dbg_flag) { 205 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 206 } 207 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 208 #if defined(PETSC_USE_COMPLEX) 209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 210 #else 211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 212 #endif 213 ierr = PetscFPTrapPop();CHKERRQ(ierr); 214 if (B_ierr) { 215 if (B_ierr < 0 ) { 216 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 217 } else if (B_ierr <= B_N) { 218 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 219 } else { 220 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 221 } 222 } 223 224 if (B_neigs > nmax) { 225 if (pcbddc->dbg_flag) { 226 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 227 } 228 eigs_start = B_neigs -nmax; 229 B_neigs = nmax; 230 } 231 232 nmin_s = PetscMin(nmin,B_N); 233 if (B_neigs < nmin_s) { 234 PetscBLASInt B_neigs2; 235 236 B_IU = B_N - B_neigs; 237 B_IL = B_N - nmin_s + 1; 238 if (pcbddc->dbg_flag) { 239 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 240 } 241 if (sub_schurs->is_hermitian) { 242 PetscInt j; 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 for (j=0;j<subset_size;j++) { 247 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 248 } 249 } else { 250 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 251 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 } 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 #if defined(PETSC_USE_COMPLEX) 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 256 #else 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 258 #endif 259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 260 B_neigs += B_neigs2; 261 } 262 if (B_ierr) { 263 if (B_ierr < 0 ) { 264 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 265 } else if (B_ierr <= B_N) { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 267 } else { 268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 269 } 270 } 271 if (pcbddc->dbg_flag) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 273 for (j=0;j<B_neigs;j++) { 274 if (eigs[j] == 0.0) { 275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 276 } else { 277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 278 } 279 } 280 } 281 } else { 282 /* TODO */ 283 } 284 } 285 maxneigs = PetscMax(B_neigs,maxneigs); 286 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 287 if (B_neigs) { 288 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 289 290 if (pcbddc->dbg_flag > 1) { 291 PetscInt ii; 292 for (ii=0;ii<B_neigs;ii++) { 293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 294 for (j=0;j<B_N;j++) { 295 #if defined(PETSC_USE_COMPLEX) 296 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 297 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 299 #else 300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 301 #endif 302 } 303 } 304 } 305 #if 0 306 for (j=0;j<B_neigs;j++) { 307 PetscBLASInt Blas_N,Blas_one = 1.0; 308 PetscScalar norm; 309 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 310 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 311 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 312 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 313 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 314 } else { 315 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 316 } 317 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 318 } 319 #endif 320 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 321 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 322 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 323 cum++; 324 } 325 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 326 /* shift for next computation */ 327 cumarray += subset_size*subset_size; 328 } 329 if (pcbddc->dbg_flag) { 330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 331 } 332 333 if (mss) { 334 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 335 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 336 /* destroy matrices (junk) */ 337 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 338 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 339 } 340 if (allocated_S_St) { 341 ierr = PetscFree2(S,St);CHKERRQ(ierr); 342 } 343 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 344 #if defined(PETSC_USE_COMPLEX) 345 ierr = PetscFree(rwork);CHKERRQ(ierr); 346 #endif 347 if (pcbddc->dbg_flag) { 348 PetscInt maxneigs_r; 349 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 351 } 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCSetUpSolvers" 357 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 358 { 359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 360 PetscScalar *coarse_submat_vals; 361 PetscErrorCode ierr; 362 363 PetscFunctionBegin; 364 /* Setup local scatters R_to_B and (optionally) R_to_D */ 365 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 366 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 367 368 /* Setup local neumann solver ksp_R */ 369 /* PCBDDCSetUpLocalScatters should be called first! */ 370 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 371 372 /* Change global null space passed in by the user if change of basis has been requested */ 373 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 374 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 375 } 376 377 /* 378 Setup local correction and local part of coarse basis. 379 Gives back the dense local part of the coarse matrix in column major ordering 380 */ 381 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 382 383 /* Compute total number of coarse nodes and setup coarse solver */ 384 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 385 386 /* free */ 387 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "PCBDDCResetCustomization" 393 PetscErrorCode PCBDDCResetCustomization(PC pc) 394 { 395 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 401 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 402 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 403 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 405 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 406 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 407 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 408 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 409 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetTopography" 415 PetscErrorCode PCBDDCResetTopography(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 422 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 423 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 424 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 425 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCResetSolvers" 431 PetscErrorCode PCBDDCResetSolvers(PC pc) 432 { 433 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *array; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 439 if (pcbddc->coarse_phi_B) { 440 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 441 ierr = PetscFree(array);CHKERRQ(ierr); 442 } 443 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 445 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 447 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 449 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 450 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 451 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 452 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 454 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 455 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 456 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 457 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 458 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 459 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 462 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 463 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 464 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 465 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 471 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 472 { 473 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 474 PC_IS *pcis = (PC_IS*)pc->data; 475 VecType impVecType; 476 PetscInt n_constraints,n_R,old_size; 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 if (!pcbddc->ConstraintMatrix) { 481 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 482 } 483 /* get sizes */ 484 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 485 n_R = pcis->n-pcbddc->n_vertices; 486 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 487 /* local work vectors (try to avoid unneeded work)*/ 488 /* R nodes */ 489 old_size = -1; 490 if (pcbddc->vec1_R) { 491 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 492 } 493 if (n_R != old_size) { 494 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 495 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 496 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 497 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 498 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 499 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 500 } 501 /* local primal dofs */ 502 old_size = -1; 503 if (pcbddc->vec1_P) { 504 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 505 } 506 if (pcbddc->local_primal_size != old_size) { 507 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 508 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 509 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 510 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 511 } 512 /* local explicit constraints */ 513 old_size = -1; 514 if (pcbddc->vec1_C) { 515 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 516 } 517 if (n_constraints && n_constraints != old_size) { 518 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 519 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 520 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 521 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 522 } 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "PCBDDCSetUpCorrection" 528 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 529 { 530 PetscErrorCode ierr; 531 /* pointers to pcis and pcbddc */ 532 PC_IS* pcis = (PC_IS*)pc->data; 533 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 534 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 535 /* submatrices of local problem */ 536 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 537 /* submatrices of local coarse problem */ 538 Mat S_VV,S_CV,S_VC,S_CC; 539 /* working matrices */ 540 Mat C_CR; 541 /* additional working stuff */ 542 PC pc_R; 543 Mat F; 544 PetscBool isLU,isCHOL,isILU; 545 546 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 547 PetscScalar *work; 548 PetscInt *idx_V_B; 549 PetscInt n,n_vertices,n_constraints; 550 PetscInt i,n_R,n_D,n_B; 551 PetscBool unsymmetric_check; 552 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 553 MatType impMatType; 554 /* some shortcuts to scalars */ 555 PetscScalar one=1.0,m_one=-1.0; 556 557 PetscFunctionBegin; 558 n_vertices = pcbddc->n_vertices; 559 n_constraints = pcbddc->local_primal_size-n_vertices; 560 /* Set Non-overlapping dimensions */ 561 n_B = pcis->n_B; 562 n_D = pcis->n - n_B; 563 n_R = pcis->n - n_vertices; 564 565 /* Set types for local objects needed by BDDC precondtioner */ 566 impMatType = MATSEQDENSE; 567 568 /* vertices in boundary numbering */ 569 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 570 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 571 if (i != n_vertices) { 572 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 573 } 574 575 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 576 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 577 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 578 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 580 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 581 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 582 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 584 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 585 586 unsymmetric_check = PETSC_FALSE; 587 /* allocate workspace */ 588 n = 0; 589 if (n_constraints) { 590 n += n_R*n_constraints; 591 } 592 if (n_vertices) { 593 n = PetscMax(2*n_R*n_vertices,n); 594 n = PetscMax((n_R+n_B)*n_vertices,n); 595 } 596 if (!pcbddc->symmetric_primal) { 597 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 598 unsymmetric_check = PETSC_TRUE; 599 } 600 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 601 602 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 603 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 604 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 605 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 606 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 607 if (isLU || isILU || isCHOL) { 608 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 609 } else if (sub_schurs->reuse_mumps) { 610 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 611 MatFactorType type; 612 613 F = reuse_mumps->F; 614 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 615 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 616 } else { 617 F = NULL; 618 } 619 620 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 621 if (n_constraints) { 622 Mat M1,M2,M3; 623 Mat auxmat; 624 IS is_aux; 625 PetscScalar *array,*array2; 626 627 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 628 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 629 630 /* Extract constraints on R nodes: C_{CR} */ 631 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 632 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 633 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 634 635 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 636 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 637 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 638 for (i=0;i<n_constraints;i++) { 639 const PetscScalar *row_cmat_values; 640 const PetscInt *row_cmat_indices; 641 PetscInt size_of_constraint,j; 642 643 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 644 for (j=0;j<size_of_constraint;j++) { 645 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 646 } 647 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 648 } 649 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 650 if (F) { 651 Mat B; 652 653 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 654 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 655 ierr = MatDestroy(&B);CHKERRQ(ierr); 656 } else { 657 PetscScalar *marr; 658 659 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 660 for (i=0;i<n_constraints;i++) { 661 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 662 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 663 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 664 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 665 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 666 } 667 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 668 } 669 if (!pcbddc->switch_static) { 670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 671 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 672 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 for (i=0;i<n_constraints;i++) { 674 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 675 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 676 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 677 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 678 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 679 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 680 } 681 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 682 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 683 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 684 } else { 685 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 686 pcbddc->local_auxmat2 = local_auxmat2_R; 687 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 688 } 689 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 690 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 691 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 692 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 693 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 694 if (isCHOL) { 695 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 696 } else { 697 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 698 } 699 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 700 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 701 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 702 ierr = MatDestroy(&M2);CHKERRQ(ierr); 703 ierr = MatDestroy(&M3);CHKERRQ(ierr); 704 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 705 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 706 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 707 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 708 ierr = MatDestroy(&M1);CHKERRQ(ierr); 709 } 710 /* Get submatrices from subdomain matrix */ 711 if (n_vertices) { 712 IS is_aux; 713 714 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 715 IS tis; 716 717 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 718 ierr = ISSort(tis);CHKERRQ(ierr); 719 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 ierr = ISDestroy(&tis);CHKERRQ(ierr); 721 } else { 722 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 723 } 724 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 725 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 726 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 727 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 728 } 729 730 /* Matrix of coarse basis functions (local) */ 731 if (pcbddc->coarse_phi_B) { 732 PetscInt on_B,on_primal,on_D=n_D; 733 if (pcbddc->coarse_phi_D) { 734 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 735 } 736 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 737 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 738 PetscScalar *marray; 739 740 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 741 ierr = PetscFree(marray);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 743 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 744 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 745 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 746 } 747 } 748 749 if (!pcbddc->coarse_phi_B) { 750 PetscScalar *marray; 751 752 n = n_B*pcbddc->local_primal_size; 753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 754 n += n_D*pcbddc->local_primal_size; 755 } 756 if (!pcbddc->symmetric_primal) { 757 n *= 2; 758 } 759 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 760 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 761 n = n_B*pcbddc->local_primal_size; 762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 764 n += n_D*pcbddc->local_primal_size; 765 } 766 if (!pcbddc->symmetric_primal) { 767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 768 if (pcbddc->switch_static || pcbddc->dbg_flag) { 769 n = n_B*pcbddc->local_primal_size; 770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 771 } 772 } else { 773 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 774 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 775 if (pcbddc->switch_static || pcbddc->dbg_flag) { 776 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 777 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 778 } 779 } 780 } 781 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 782 /* vertices */ 783 if (n_vertices) { 784 785 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 786 787 if (n_R) { 788 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 789 PetscBLASInt B_N,B_one = 1; 790 PetscScalar *x,*y; 791 PetscBool isseqaij; 792 793 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 794 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 796 if (F) { /* TODO could be optimized for symmetric problems */ 797 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 798 } else { 799 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 800 for (i=0;i<n_vertices;i++) { 801 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 802 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 803 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 804 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 806 } 807 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 808 } 809 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 810 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 811 if (n_constraints) { 812 Mat B; 813 814 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 815 for (i=0;i<n_vertices;i++) { 816 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 817 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 818 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 819 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 820 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 821 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 822 } 823 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 824 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 825 ierr = MatDestroy(&B);CHKERRQ(ierr); 826 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 827 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 828 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 829 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 830 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 831 ierr = MatDestroy(&B);CHKERRQ(ierr); 832 } 833 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 834 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 835 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 836 } 837 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 838 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 839 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 841 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 842 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 843 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 844 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 845 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 846 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 847 } else { 848 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 849 } 850 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 851 /* coarse basis functions */ 852 for (i=0;i<n_vertices;i++) { 853 PetscScalar *y; 854 855 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 856 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 857 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 858 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 y[n_B*i+idx_V_B[i]] = 1.0; 861 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 862 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 863 864 if (pcbddc->switch_static || pcbddc->dbg_flag) { 865 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 866 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 867 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 870 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 871 } 872 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 873 } 874 /* if n_R == 0 the object is not destroyed */ 875 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 876 } 877 878 if (n_constraints) { 879 Mat B; 880 881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 882 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 883 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 884 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 885 if (n_vertices) { 886 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 887 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 888 } else { 889 Mat S_VCt; 890 891 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 892 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 893 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 894 } 895 } 896 ierr = MatDestroy(&B);CHKERRQ(ierr); 897 /* coarse basis functions */ 898 for (i=0;i<n_constraints;i++) { 899 PetscScalar *y; 900 901 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 902 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 903 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 904 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 907 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 908 if (pcbddc->switch_static || pcbddc->dbg_flag) { 909 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 910 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 911 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 912 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 914 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 915 } 916 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 917 } 918 } 919 if (n_constraints) { 920 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 921 } 922 923 /* compute other basis functions for non-symmetric problems */ 924 if (!pcbddc->symmetric_primal) { 925 926 if (n_constraints) { 927 Mat S_CCT,B_C; 928 929 /* this is a lazy thing */ 930 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 931 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 932 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 933 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 934 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 935 if (n_vertices) { 936 Mat B_V,S_VCT; 937 938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 939 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 940 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 941 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 942 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 943 } 944 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 945 } else { /* if there are no constraints, reset work */ 946 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 947 } 948 if (n_vertices && n_R) { 949 Mat A_VRT; 950 PetscScalar *marray; 951 PetscBLASInt B_N,B_one = 1; 952 953 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 954 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 955 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 956 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 957 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 958 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 960 } 961 962 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 963 for (i=0;i<pcbddc->local_primal_size;i++) { 964 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 965 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 966 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 967 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 968 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 969 } 970 } else { 971 for (i=0;i<pcbddc->local_primal_size;i++) { 972 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 973 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 974 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 975 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 976 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 977 } 978 } 979 /* coarse basis functions */ 980 for (i=0;i<pcbddc->local_primal_size;i++) { 981 PetscScalar *y; 982 983 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 984 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 985 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 986 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 if (i<n_vertices) { 989 y[n_B*i+idx_V_B[i]] = 1.0; 990 } 991 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 992 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 993 994 if (pcbddc->switch_static || pcbddc->dbg_flag) { 995 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 996 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 997 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1000 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1001 } 1002 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1003 } 1004 } 1005 /* free memory */ 1006 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1007 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1008 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1009 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1011 ierr = PetscFree(work);CHKERRQ(ierr); 1012 if (n_vertices) { 1013 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1014 } 1015 if (n_constraints) { 1016 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1017 } 1018 /* Checking coarse_sub_mat and coarse basis functios */ 1019 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1020 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1021 if (pcbddc->dbg_flag) { 1022 Mat coarse_sub_mat; 1023 Mat AUXMAT,TM1,TM2,TM3,TM4; 1024 Mat coarse_phi_D,coarse_phi_B; 1025 Mat coarse_psi_D,coarse_psi_B; 1026 Mat A_II,A_BB,A_IB,A_BI; 1027 Mat C_B,CPHI; 1028 IS is_dummy; 1029 Vec mones; 1030 MatType checkmattype=MATSEQAIJ; 1031 PetscReal real_value; 1032 1033 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1034 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1035 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1036 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1037 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1038 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1039 if (unsymmetric_check) { 1040 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1042 } 1043 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1044 1045 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1047 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1048 if (unsymmetric_check) { 1049 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1051 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 } else { 1062 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1063 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1064 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 } 1071 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1072 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1073 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1074 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1077 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 1080 /* check constraints */ 1081 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1082 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1083 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1084 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1085 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1086 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1087 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1089 if (unsymmetric_check) { 1090 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1091 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1092 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1093 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1095 } 1096 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1097 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1098 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1099 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1100 1101 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1102 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1103 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1104 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1105 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1106 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1107 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1108 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1109 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1110 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1111 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1112 if (unsymmetric_check) { 1113 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1114 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1115 } 1116 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1117 } 1118 /* get back data */ 1119 *coarse_submat_vals_n = coarse_submat_vals; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1125 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1126 { 1127 Mat *work_mat; 1128 IS isrow_s,iscol_s; 1129 PetscBool rsorted,csorted; 1130 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1135 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1136 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1137 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1138 1139 if (!rsorted) { 1140 const PetscInt *idxs; 1141 PetscInt *idxs_sorted,i; 1142 1143 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1144 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1145 for (i=0;i<rsize;i++) { 1146 idxs_perm_r[i] = i; 1147 } 1148 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1149 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1150 for (i=0;i<rsize;i++) { 1151 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1152 } 1153 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1154 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1155 } else { 1156 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1157 isrow_s = isrow; 1158 } 1159 1160 if (!csorted) { 1161 if (isrow == iscol) { 1162 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1163 iscol_s = isrow_s; 1164 } else { 1165 const PetscInt *idxs; 1166 PetscInt *idxs_sorted,i; 1167 1168 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1169 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1170 for (i=0;i<csize;i++) { 1171 idxs_perm_c[i] = i; 1172 } 1173 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1174 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1175 for (i=0;i<csize;i++) { 1176 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1177 } 1178 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1179 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1180 } 1181 } else { 1182 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1183 iscol_s = iscol; 1184 } 1185 1186 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1187 1188 if (!rsorted || !csorted) { 1189 Mat new_mat; 1190 IS is_perm_r,is_perm_c; 1191 1192 if (!rsorted) { 1193 PetscInt *idxs_r,i; 1194 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1195 for (i=0;i<rsize;i++) { 1196 idxs_r[idxs_perm_r[i]] = i; 1197 } 1198 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1199 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1200 } else { 1201 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1202 } 1203 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1204 1205 if (!csorted) { 1206 if (isrow_s == iscol_s) { 1207 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1208 is_perm_c = is_perm_r; 1209 } else { 1210 PetscInt *idxs_c,i; 1211 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1212 for (i=0;i<csize;i++) { 1213 idxs_c[idxs_perm_c[i]] = i; 1214 } 1215 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1216 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1217 } 1218 } else { 1219 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1220 } 1221 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1222 1223 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1225 work_mat[0] = new_mat; 1226 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1227 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1228 } 1229 1230 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1231 *B = work_mat[0]; 1232 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1233 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1234 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1240 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1241 { 1242 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1243 PC_IS* pcis = (PC_IS*)pc->data; 1244 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1245 Mat new_mat; 1246 IS is_local,is_global; 1247 PetscInt local_size; 1248 PetscBool isseqaij; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1253 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1254 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1255 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1256 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1257 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1258 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1259 1260 /* check */ 1261 if (pcbddc->dbg_flag) { 1262 Vec x,x_change; 1263 PetscReal error; 1264 1265 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1266 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1267 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1268 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1274 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1275 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1277 ierr = VecDestroy(&x);CHKERRQ(ierr); 1278 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1279 } 1280 1281 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1282 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1283 if (isseqaij) { 1284 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1285 } else { 1286 Mat work_mat; 1287 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1288 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1289 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1290 } 1291 if (matis->A->symmetric_set) { 1292 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1293 #if !defined(PETSC_USE_COMPLEX) 1294 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1295 #endif 1296 } 1297 /* 1298 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1299 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1300 */ 1301 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1307 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1308 { 1309 PC_IS* pcis = (PC_IS*)(pc->data); 1310 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1311 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1312 PetscInt *idx_R_local=NULL; 1313 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1314 PetscInt vbs,bs; 1315 PetscBT bitmask=NULL; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 /* 1320 No need to setup local scatters if 1321 - primal space is unchanged 1322 AND 1323 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1324 AND 1325 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1326 */ 1327 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1328 PetscFunctionReturn(0); 1329 } 1330 /* destroy old objects */ 1331 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1332 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1333 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1334 /* Set Non-overlapping dimensions */ 1335 n_B = pcis->n_B; 1336 n_D = pcis->n - n_B; 1337 n_vertices = pcbddc->n_vertices; 1338 1339 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1340 1341 /* create auxiliary bitmask and allocate workspace */ 1342 if (!sub_schurs->reuse_mumps) { 1343 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1344 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1345 for (i=0;i<n_vertices;i++) { 1346 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1347 } 1348 1349 for (i=0, n_R=0; i<pcis->n; i++) { 1350 if (!PetscBTLookup(bitmask,i)) { 1351 idx_R_local[n_R++] = i; 1352 } 1353 } 1354 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1355 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1356 1357 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1358 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1359 } 1360 1361 /* Block code */ 1362 vbs = 1; 1363 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1364 if (bs>1 && !(n_vertices%bs)) { 1365 PetscBool is_blocked = PETSC_TRUE; 1366 PetscInt *vary; 1367 if (!sub_schurs->reuse_mumps) { 1368 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1369 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1370 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1371 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1372 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1373 for (i=0; i<pcis->n/bs; i++) { 1374 if (vary[i]!=0 && vary[i]!=bs) { 1375 is_blocked = PETSC_FALSE; 1376 break; 1377 } 1378 } 1379 ierr = PetscFree(vary);CHKERRQ(ierr); 1380 } else { 1381 /* Verify directly the R set */ 1382 for (i=0; i<n_R/bs; i++) { 1383 PetscInt j,node=idx_R_local[bs*i]; 1384 for (j=1; j<bs; j++) { 1385 if (node != idx_R_local[bs*i+j]-j) { 1386 is_blocked = PETSC_FALSE; 1387 break; 1388 } 1389 } 1390 } 1391 } 1392 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1393 vbs = bs; 1394 for (i=0;i<n_R/vbs;i++) { 1395 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1396 } 1397 } 1398 } 1399 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1400 if (sub_schurs->reuse_mumps) { 1401 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1402 1403 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1404 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1405 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1406 reuse_mumps->is_R = pcbddc->is_R_local; 1407 } else { 1408 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1409 } 1410 1411 /* print some info if requested */ 1412 if (pcbddc->dbg_flag) { 1413 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1414 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1419 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1420 } 1421 1422 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1423 if (!sub_schurs->reuse_mumps) { 1424 IS is_aux1,is_aux2; 1425 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1426 1427 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1428 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1429 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1430 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1431 for (i=0; i<n_D; i++) { 1432 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1433 } 1434 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1435 for (i=0, j=0; i<n_R; i++) { 1436 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1437 aux_array1[j++] = i; 1438 } 1439 } 1440 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1441 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1442 for (i=0, j=0; i<n_B; i++) { 1443 if (!PetscBTLookup(bitmask,is_indices[i])) { 1444 aux_array2[j++] = i; 1445 } 1446 } 1447 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1448 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1449 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1450 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1451 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1452 1453 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1454 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1455 for (i=0, j=0; i<n_R; i++) { 1456 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1457 aux_array1[j++] = i; 1458 } 1459 } 1460 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1461 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1462 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1463 } 1464 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1465 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1466 } else { 1467 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1468 IS tis; 1469 PetscInt schur_size; 1470 1471 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1472 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1473 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1474 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1476 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1477 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1478 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1479 } 1480 } 1481 PetscFunctionReturn(0); 1482 } 1483 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1487 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1488 { 1489 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1490 PC_IS *pcis = (PC_IS*)pc->data; 1491 PC pc_temp; 1492 Mat A_RR; 1493 MatReuse reuse; 1494 PetscScalar m_one = -1.0; 1495 PetscReal value; 1496 PetscInt n_D,n_R; 1497 PetscBool use_exact,use_exact_reduced,issbaij; 1498 PetscErrorCode ierr; 1499 /* prefixes stuff */ 1500 char dir_prefix[256],neu_prefix[256],str_level[16]; 1501 size_t len; 1502 1503 PetscFunctionBegin; 1504 1505 /* compute prefixes */ 1506 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1507 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1508 if (!pcbddc->current_level) { 1509 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1510 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1511 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1512 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1513 } else { 1514 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1515 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1516 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1517 len -= 15; /* remove "pc_bddc_coarse_" */ 1518 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1519 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1520 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1521 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1522 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1523 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1524 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1525 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1526 } 1527 1528 /* DIRICHLET PROBLEM */ 1529 if (dirichlet) { 1530 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1531 if (pcbddc->local_mat->symmetric_set) { 1532 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1533 } 1534 /* Matrix for Dirichlet problem is pcis->A_II */ 1535 n_D = pcis->n - pcis->n_B; 1536 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1537 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1538 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1539 /* default */ 1540 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1541 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1542 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1543 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1544 if (issbaij) { 1545 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1546 } else { 1547 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1548 } 1549 /* Allow user's customization */ 1550 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1551 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1552 } 1553 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1554 if (sub_schurs->reuse_mumps) { 1555 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1556 1557 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1558 } 1559 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1560 if (!n_D) { 1561 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1562 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1563 } 1564 /* Set Up KSP for Dirichlet problem of BDDC */ 1565 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1566 /* set ksp_D into pcis data */ 1567 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1568 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1569 pcis->ksp_D = pcbddc->ksp_D; 1570 } 1571 1572 /* NEUMANN PROBLEM */ 1573 A_RR = 0; 1574 if (neumann) { 1575 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1576 PetscInt ibs,mbs; 1577 PetscBool issbaij; 1578 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1579 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1580 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1581 if (pcbddc->ksp_R) { /* already created ksp */ 1582 PetscInt nn_R; 1583 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1584 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1585 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1586 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1587 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1588 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1589 reuse = MAT_INITIAL_MATRIX; 1590 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1591 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1592 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1593 reuse = MAT_INITIAL_MATRIX; 1594 } else { /* safe to reuse the matrix */ 1595 reuse = MAT_REUSE_MATRIX; 1596 } 1597 } 1598 /* last check */ 1599 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1600 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1601 reuse = MAT_INITIAL_MATRIX; 1602 } 1603 } else { /* first time, so we need to create the matrix */ 1604 reuse = MAT_INITIAL_MATRIX; 1605 } 1606 /* extract A_RR */ 1607 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1608 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1610 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1611 if (matis->A == pcbddc->local_mat) { 1612 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1613 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1614 } else { 1615 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1616 } 1617 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1618 if (matis->A == pcbddc->local_mat) { 1619 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1620 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1621 } else { 1622 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1623 } 1624 } 1625 if (!sub_schurs->reuse_mumps) { 1626 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1627 if (pcbddc->local_mat->symmetric_set) { 1628 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1629 } 1630 } else { 1631 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1632 1633 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1634 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1635 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1636 } 1637 if (!pcbddc->ksp_R) { /* create object if not present */ 1638 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1639 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1640 /* default */ 1641 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1642 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1643 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1644 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1645 if (issbaij) { 1646 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1647 } else { 1648 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1649 } 1650 /* Allow user's customization */ 1651 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1652 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1653 } 1654 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1655 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1656 if (!n_R) { 1657 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1658 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1659 } 1660 /* Reuse MUMPS solver if it is present */ 1661 if (sub_schurs->reuse_mumps) { 1662 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1663 1664 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1665 } 1666 /* Set Up KSP for Neumann problem of BDDC */ 1667 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1668 } 1669 /* free Neumann problem's matrix */ 1670 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1671 1672 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1673 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1674 if (pcbddc->dbg_flag) { 1675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1676 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1677 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1678 } 1679 if (dirichlet) { /* Dirichlet */ 1680 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1681 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1682 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1683 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1684 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1685 /* need to be adapted? */ 1686 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1687 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1688 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1689 /* print info */ 1690 if (pcbddc->dbg_flag) { 1691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1693 } 1694 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1695 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1696 } 1697 } 1698 if (neumann) { /* Neumann */ 1699 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1700 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1701 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1702 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1703 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1704 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1705 /* need to be adapted? */ 1706 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1707 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1708 /* print info */ 1709 if (pcbddc->dbg_flag) { 1710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1712 } 1713 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1714 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1715 } 1716 } 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1723 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1724 { 1725 PetscErrorCode ierr; 1726 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1727 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1728 1729 PetscFunctionBegin; 1730 if (!sub_schurs->reuse_mumps) { 1731 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1732 } 1733 if (!pcbddc->switch_static) { 1734 if (applytranspose && pcbddc->local_auxmat1) { 1735 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1736 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1737 } 1738 if (!sub_schurs->reuse_mumps) { 1739 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1741 } else { 1742 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1743 1744 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1745 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1746 } 1747 } else { 1748 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1750 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1751 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 if (applytranspose && pcbddc->local_auxmat1) { 1753 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1754 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1755 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1756 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1757 } 1758 } 1759 if (!sub_schurs->reuse_mumps) { 1760 if (applytranspose) { 1761 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1762 } else { 1763 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1764 } 1765 #if defined(PETSC_HAVE_MUMPS) 1766 } else { 1767 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1768 1769 if (applytranspose) { 1770 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1771 } else { 1772 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1773 } 1774 #endif 1775 } 1776 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1777 if (!pcbddc->switch_static) { 1778 if (!sub_schurs->reuse_mumps) { 1779 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1781 } else { 1782 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1783 1784 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1785 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1786 } 1787 if (!applytranspose && pcbddc->local_auxmat1) { 1788 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1789 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1790 } 1791 } else { 1792 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1795 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1796 if (!applytranspose && pcbddc->local_auxmat1) { 1797 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1798 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1799 } 1800 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1802 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1803 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1811 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1812 { 1813 PetscErrorCode ierr; 1814 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1815 PC_IS* pcis = (PC_IS*) (pc->data); 1816 const PetscScalar zero = 0.0; 1817 1818 PetscFunctionBegin; 1819 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1820 if (applytranspose) { 1821 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1822 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1823 } else { 1824 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1825 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1826 } 1827 /* start communications from local primal nodes to rhs of coarse solver */ 1828 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1829 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1830 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1831 1832 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1833 /* TODO remove null space when doing multilevel */ 1834 if (pcbddc->coarse_ksp) { 1835 Vec rhs,sol; 1836 1837 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1838 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1839 if (applytranspose) { 1840 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1841 } else { 1842 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1843 } 1844 } 1845 1846 /* Local solution on R nodes */ 1847 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1848 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1849 } 1850 1851 /* communications from coarse sol to local primal nodes */ 1852 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1853 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1854 1855 /* Sum contributions from two levels */ 1856 if (applytranspose) { 1857 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1858 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1859 } else { 1860 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1861 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1868 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1869 { 1870 PetscErrorCode ierr; 1871 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1872 PetscScalar *array; 1873 Vec from,to; 1874 1875 PetscFunctionBegin; 1876 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1877 from = pcbddc->coarse_vec; 1878 to = pcbddc->vec1_P; 1879 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1880 Vec tvec; 1881 1882 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1883 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1884 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1885 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1886 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1887 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1888 } 1889 } else { /* from local to global -> put data in coarse right hand side */ 1890 from = pcbddc->vec1_P; 1891 to = pcbddc->coarse_vec; 1892 } 1893 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1899 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1900 { 1901 PetscErrorCode ierr; 1902 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1903 PetscScalar *array; 1904 Vec from,to; 1905 1906 PetscFunctionBegin; 1907 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1908 from = pcbddc->coarse_vec; 1909 to = pcbddc->vec1_P; 1910 } else { /* from local to global -> put data in coarse right hand side */ 1911 from = pcbddc->vec1_P; 1912 to = pcbddc->coarse_vec; 1913 } 1914 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1915 if (smode == SCATTER_FORWARD) { 1916 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1917 Vec tvec; 1918 1919 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1920 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1921 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1922 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1923 } 1924 } else { 1925 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1926 ierr = VecResetArray(from);CHKERRQ(ierr); 1927 } 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 /* uncomment for testing purposes */ 1933 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1936 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1937 { 1938 PetscErrorCode ierr; 1939 PC_IS* pcis = (PC_IS*)(pc->data); 1940 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1941 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1942 /* one and zero */ 1943 PetscScalar one=1.0,zero=0.0; 1944 /* space to store constraints and their local indices */ 1945 PetscScalar *constraints_data; 1946 PetscInt *constraints_idxs,*constraints_idxs_B; 1947 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1948 PetscInt *constraints_n; 1949 /* iterators */ 1950 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1951 /* BLAS integers */ 1952 PetscBLASInt lwork,lierr; 1953 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1954 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1955 /* reuse */ 1956 PetscInt olocal_primal_size,olocal_primal_size_cc; 1957 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1958 /* change of basis */ 1959 PetscBool qr_needed; 1960 PetscBT change_basis,qr_needed_idx; 1961 /* auxiliary stuff */ 1962 PetscInt *nnz,*is_indices; 1963 PetscInt ncc; 1964 /* some quantities */ 1965 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1966 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1967 1968 PetscFunctionBegin; 1969 /* Destroy Mat objects computed previously */ 1970 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1971 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1972 /* save info on constraints from previous setup (if any) */ 1973 olocal_primal_size = pcbddc->local_primal_size; 1974 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1975 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1976 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1977 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1978 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1979 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1980 1981 /* print some info */ 1982 if (pcbddc->dbg_flag) { 1983 IS vertices; 1984 PetscInt nv,nedges,nfaces; 1985 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1986 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1987 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1993 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1994 } 1995 1996 if (!pcbddc->adaptive_selection) { 1997 IS ISForVertices,*ISForFaces,*ISForEdges; 1998 MatNullSpace nearnullsp; 1999 const Vec *nearnullvecs; 2000 Vec *localnearnullsp; 2001 PetscScalar *array; 2002 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2003 PetscBool nnsp_has_cnst; 2004 /* LAPACK working arrays for SVD or POD */ 2005 PetscBool skip_lapack,boolforchange; 2006 PetscScalar *work; 2007 PetscReal *singular_vals; 2008 #if defined(PETSC_USE_COMPLEX) 2009 PetscReal *rwork; 2010 #endif 2011 #if defined(PETSC_MISSING_LAPACK_GESVD) 2012 PetscScalar *temp_basis,*correlation_mat; 2013 #else 2014 PetscBLASInt dummy_int=1; 2015 PetscScalar dummy_scalar=1.; 2016 #endif 2017 2018 /* Get index sets for faces, edges and vertices from graph */ 2019 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2020 /* free unneeded index sets */ 2021 if (!pcbddc->use_vertices) { 2022 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2023 } 2024 if (!pcbddc->use_edges) { 2025 for (i=0;i<n_ISForEdges;i++) { 2026 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2027 } 2028 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2029 n_ISForEdges = 0; 2030 } 2031 if (!pcbddc->use_faces) { 2032 for (i=0;i<n_ISForFaces;i++) { 2033 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2034 } 2035 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2036 n_ISForFaces = 0; 2037 } 2038 2039 #if defined(PETSC_USE_DEBUG) 2040 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2041 Also use_change_of_basis should be consistent among processors */ 2042 if (pcbddc->NullSpace) { 2043 PetscBool tbool[2],gbool[2]; 2044 2045 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2046 pcbddc->use_change_of_basis = PETSC_TRUE; 2047 if (!ISForEdges) { 2048 pcbddc->use_change_on_faces = PETSC_TRUE; 2049 } 2050 } 2051 tbool[0] = pcbddc->use_change_of_basis; 2052 tbool[1] = pcbddc->use_change_on_faces; 2053 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2054 pcbddc->use_change_of_basis = gbool[0]; 2055 pcbddc->use_change_on_faces = gbool[1]; 2056 } 2057 #endif 2058 2059 /* check if near null space is attached to global mat */ 2060 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2061 if (nearnullsp) { 2062 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2063 /* remove any stored info */ 2064 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2065 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2066 /* store information for BDDC solver reuse */ 2067 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2068 pcbddc->onearnullspace = nearnullsp; 2069 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2070 for (i=0;i<nnsp_size;i++) { 2071 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2072 } 2073 } else { /* if near null space is not provided BDDC uses constants by default */ 2074 nnsp_size = 0; 2075 nnsp_has_cnst = PETSC_TRUE; 2076 } 2077 /* get max number of constraints on a single cc */ 2078 max_constraints = nnsp_size; 2079 if (nnsp_has_cnst) max_constraints++; 2080 2081 /* 2082 Evaluate maximum storage size needed by the procedure 2083 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2084 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2085 There can be multiple constraints per connected component 2086 */ 2087 n_vertices = 0; 2088 if (ISForVertices) { 2089 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2090 } 2091 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2092 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2093 2094 total_counts = n_ISForFaces+n_ISForEdges; 2095 total_counts *= max_constraints; 2096 total_counts += n_vertices; 2097 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2098 2099 total_counts = 0; 2100 max_size_of_constraint = 0; 2101 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2102 IS used_is; 2103 if (i<n_ISForEdges) { 2104 used_is = ISForEdges[i]; 2105 } else { 2106 used_is = ISForFaces[i-n_ISForEdges]; 2107 } 2108 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2109 total_counts += j; 2110 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2111 } 2112 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2113 2114 /* get local part of global near null space vectors */ 2115 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2116 for (k=0;k<nnsp_size;k++) { 2117 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2118 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2119 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 } 2121 2122 /* whether or not to skip lapack calls */ 2123 skip_lapack = PETSC_TRUE; 2124 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2125 2126 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2127 if (!skip_lapack) { 2128 PetscScalar temp_work; 2129 2130 #if defined(PETSC_MISSING_LAPACK_GESVD) 2131 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2132 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2133 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2134 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2135 #if defined(PETSC_USE_COMPLEX) 2136 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2137 #endif 2138 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2139 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2140 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2141 lwork = -1; 2142 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2143 #if !defined(PETSC_USE_COMPLEX) 2144 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2145 #else 2146 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2147 #endif 2148 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2149 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2150 #else /* on missing GESVD */ 2151 /* SVD */ 2152 PetscInt max_n,min_n; 2153 max_n = max_size_of_constraint; 2154 min_n = max_constraints; 2155 if (max_size_of_constraint < max_constraints) { 2156 min_n = max_size_of_constraint; 2157 max_n = max_constraints; 2158 } 2159 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2160 #if defined(PETSC_USE_COMPLEX) 2161 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2162 #endif 2163 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2164 lwork = -1; 2165 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2166 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2167 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2168 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2169 #if !defined(PETSC_USE_COMPLEX) 2170 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2171 #else 2172 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2173 #endif 2174 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2175 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2176 #endif /* on missing GESVD */ 2177 /* Allocate optimal workspace */ 2178 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2179 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2180 } 2181 /* Now we can loop on constraining sets */ 2182 total_counts = 0; 2183 constraints_idxs_ptr[0] = 0; 2184 constraints_data_ptr[0] = 0; 2185 /* vertices */ 2186 if (n_vertices) { 2187 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2188 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2189 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2190 for (i=0;i<n_vertices;i++) { 2191 constraints_n[total_counts] = 1; 2192 constraints_data[total_counts] = 1.0; 2193 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2194 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2195 total_counts++; 2196 } 2197 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2198 PetscBool used_vertex; 2199 for (i=0;i<n_vertices;i++) { 2200 used_vertex = PETSC_FALSE; 2201 k = 0; 2202 while (!used_vertex && k<nnsp_size) { 2203 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2204 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2205 constraints_n[total_counts] = 1; 2206 constraints_idxs[total_counts] = is_indices[i]; 2207 constraints_data[total_counts] = 1.0; 2208 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2209 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2210 total_counts++; 2211 used_vertex = PETSC_TRUE; 2212 } 2213 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2214 k++; 2215 } 2216 } 2217 } 2218 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2219 n_vertices = total_counts; 2220 } 2221 2222 /* edges and faces */ 2223 total_counts_cc = total_counts; 2224 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2225 IS used_is; 2226 PetscBool idxs_copied = PETSC_FALSE; 2227 2228 if (ncc<n_ISForEdges) { 2229 used_is = ISForEdges[ncc]; 2230 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2231 } else { 2232 used_is = ISForFaces[ncc-n_ISForEdges]; 2233 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2234 } 2235 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2236 2237 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2238 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2239 /* change of basis should not be performed on local periodic nodes */ 2240 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2241 if (nnsp_has_cnst) { 2242 PetscScalar quad_value; 2243 2244 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2245 idxs_copied = PETSC_TRUE; 2246 2247 if (!pcbddc->use_nnsp_true) { 2248 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2249 } else { 2250 quad_value = 1.0; 2251 } 2252 for (j=0;j<size_of_constraint;j++) { 2253 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2254 } 2255 temp_constraints++; 2256 total_counts++; 2257 } 2258 for (k=0;k<nnsp_size;k++) { 2259 PetscReal real_value; 2260 PetscScalar *ptr_to_data; 2261 2262 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2263 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2264 for (j=0;j<size_of_constraint;j++) { 2265 ptr_to_data[j] = array[is_indices[j]]; 2266 } 2267 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2268 /* check if array is null on the connected component */ 2269 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2270 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2271 if (real_value > 0.0) { /* keep indices and values */ 2272 temp_constraints++; 2273 total_counts++; 2274 if (!idxs_copied) { 2275 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2276 idxs_copied = PETSC_TRUE; 2277 } 2278 } 2279 } 2280 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2281 valid_constraints = temp_constraints; 2282 if (!pcbddc->use_nnsp_true && temp_constraints) { 2283 if (temp_constraints == 1) { /* just normalize the constraint */ 2284 PetscScalar norm,*ptr_to_data; 2285 2286 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2287 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2288 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2289 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2290 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2291 } else { /* perform SVD */ 2292 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2293 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2294 2295 #if defined(PETSC_MISSING_LAPACK_GESVD) 2296 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2297 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2298 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2299 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2300 from that computed using LAPACKgesvd 2301 -> This is due to a different computation of eigenvectors in LAPACKheev 2302 -> The quality of the POD-computed basis will be the same */ 2303 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2304 /* Store upper triangular part of correlation matrix */ 2305 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2306 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2307 for (j=0;j<temp_constraints;j++) { 2308 for (k=0;k<j+1;k++) { 2309 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2310 } 2311 } 2312 /* compute eigenvalues and eigenvectors of correlation matrix */ 2313 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2314 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2315 #if !defined(PETSC_USE_COMPLEX) 2316 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2317 #else 2318 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2319 #endif 2320 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2321 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2322 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2323 j = 0; 2324 while (j < temp_constraints && singular_vals[j] < tol) j++; 2325 total_counts = total_counts-j; 2326 valid_constraints = temp_constraints-j; 2327 /* scale and copy POD basis into used quadrature memory */ 2328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2331 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2333 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2334 if (j<temp_constraints) { 2335 PetscInt ii; 2336 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2337 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2338 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2339 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2340 for (k=0;k<temp_constraints-j;k++) { 2341 for (ii=0;ii<size_of_constraint;ii++) { 2342 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2343 } 2344 } 2345 } 2346 #else /* on missing GESVD */ 2347 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2348 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2349 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2350 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2351 #if !defined(PETSC_USE_COMPLEX) 2352 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2353 #else 2354 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2355 #endif 2356 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2357 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2358 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2359 k = temp_constraints; 2360 if (k > size_of_constraint) k = size_of_constraint; 2361 j = 0; 2362 while (j < k && singular_vals[k-j-1] < tol) j++; 2363 valid_constraints = k-j; 2364 total_counts = total_counts-temp_constraints+valid_constraints; 2365 #endif /* on missing GESVD */ 2366 } 2367 } 2368 /* update pointers information */ 2369 if (valid_constraints) { 2370 constraints_n[total_counts_cc] = valid_constraints; 2371 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2372 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2373 /* set change_of_basis flag */ 2374 if (boolforchange) { 2375 PetscBTSet(change_basis,total_counts_cc); 2376 } 2377 total_counts_cc++; 2378 } 2379 } 2380 /* free workspace */ 2381 if (!skip_lapack) { 2382 ierr = PetscFree(work);CHKERRQ(ierr); 2383 #if defined(PETSC_USE_COMPLEX) 2384 ierr = PetscFree(rwork);CHKERRQ(ierr); 2385 #endif 2386 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2387 #if defined(PETSC_MISSING_LAPACK_GESVD) 2388 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2389 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2390 #endif 2391 } 2392 for (k=0;k<nnsp_size;k++) { 2393 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2394 } 2395 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2396 /* free index sets of faces, edges and vertices */ 2397 for (i=0;i<n_ISForFaces;i++) { 2398 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2399 } 2400 if (n_ISForFaces) { 2401 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2402 } 2403 for (i=0;i<n_ISForEdges;i++) { 2404 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2405 } 2406 if (n_ISForEdges) { 2407 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2408 } 2409 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2410 } else { 2411 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2412 2413 total_counts = 0; 2414 n_vertices = 0; 2415 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2416 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2417 } 2418 max_constraints = 0; 2419 total_counts_cc = 0; 2420 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2421 total_counts += pcbddc->adaptive_constraints_n[i]; 2422 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2423 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2424 } 2425 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2426 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2427 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2428 constraints_data = pcbddc->adaptive_constraints_data; 2429 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2430 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2431 total_counts_cc = 0; 2432 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2433 if (pcbddc->adaptive_constraints_n[i]) { 2434 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2435 } 2436 } 2437 #if 0 2438 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2439 for (i=0;i<total_counts_cc;i++) { 2440 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2441 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2442 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2443 printf(" %d",constraints_idxs[j]); 2444 } 2445 printf("\n"); 2446 printf("number of cc: %d\n",constraints_n[i]); 2447 } 2448 for (i=0;i<n_vertices;i++) { 2449 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2450 } 2451 for (i=0;i<sub_schurs->n_subs;i++) { 2452 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2453 } 2454 #endif 2455 2456 max_size_of_constraint = 0; 2457 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2458 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2459 /* Change of basis */ 2460 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2461 if (pcbddc->use_change_of_basis) { 2462 for (i=0;i<sub_schurs->n_subs;i++) { 2463 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2464 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2465 } 2466 } 2467 } 2468 } 2469 pcbddc->local_primal_size = total_counts; 2470 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2471 2472 /* map constraints_idxs in boundary numbering */ 2473 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2474 if (i != constraints_idxs_ptr[total_counts_cc]) { 2475 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2476 } 2477 2478 /* Create constraint matrix */ 2479 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2480 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2481 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2482 2483 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2484 /* determine if a QR strategy is needed for change of basis */ 2485 qr_needed = PETSC_FALSE; 2486 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2487 total_primal_vertices=0; 2488 pcbddc->local_primal_size_cc = 0; 2489 for (i=0;i<total_counts_cc;i++) { 2490 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2491 if (size_of_constraint == 1) { 2492 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2493 pcbddc->local_primal_size_cc += 1; 2494 } else if (PetscBTLookup(change_basis,i)) { 2495 for (k=0;k<constraints_n[i];k++) { 2496 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2497 } 2498 pcbddc->local_primal_size_cc += constraints_n[i]; 2499 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2500 PetscBTSet(qr_needed_idx,i); 2501 qr_needed = PETSC_TRUE; 2502 } 2503 } else { 2504 pcbddc->local_primal_size_cc += 1; 2505 } 2506 } 2507 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2508 pcbddc->n_vertices = total_primal_vertices; 2509 /* permute indices in order to have a sorted set of vertices */ 2510 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2511 2512 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2513 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2514 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2515 2516 /* nonzero structure of constraint matrix */ 2517 /* and get reference dof for local constraints */ 2518 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2519 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2520 2521 j = total_primal_vertices; 2522 total_counts = total_primal_vertices; 2523 cum = total_primal_vertices; 2524 for (i=n_vertices;i<total_counts_cc;i++) { 2525 if (!PetscBTLookup(change_basis,i)) { 2526 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2527 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2528 cum++; 2529 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2530 for (k=0;k<constraints_n[i];k++) { 2531 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2532 nnz[j+k] = size_of_constraint; 2533 } 2534 j += constraints_n[i]; 2535 } 2536 } 2537 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2538 ierr = PetscFree(nnz);CHKERRQ(ierr); 2539 2540 /* set values in constraint matrix */ 2541 for (i=0;i<total_primal_vertices;i++) { 2542 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2543 } 2544 total_counts = total_primal_vertices; 2545 for (i=n_vertices;i<total_counts_cc;i++) { 2546 if (!PetscBTLookup(change_basis,i)) { 2547 PetscInt *cols; 2548 2549 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2550 cols = constraints_idxs+constraints_idxs_ptr[i]; 2551 for (k=0;k<constraints_n[i];k++) { 2552 PetscInt row = total_counts+k; 2553 PetscScalar *vals; 2554 2555 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2556 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2557 } 2558 total_counts += constraints_n[i]; 2559 } 2560 } 2561 /* assembling */ 2562 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2563 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2564 2565 /* 2566 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2567 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2568 */ 2569 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2570 if (pcbddc->use_change_of_basis) { 2571 /* dual and primal dofs on a single cc */ 2572 PetscInt dual_dofs,primal_dofs; 2573 /* working stuff for GEQRF */ 2574 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2575 PetscBLASInt lqr_work; 2576 /* working stuff for UNGQR */ 2577 PetscScalar *gqr_work,lgqr_work_t; 2578 PetscBLASInt lgqr_work; 2579 /* working stuff for TRTRS */ 2580 PetscScalar *trs_rhs; 2581 PetscBLASInt Blas_NRHS; 2582 /* pointers for values insertion into change of basis matrix */ 2583 PetscInt *start_rows,*start_cols; 2584 PetscScalar *start_vals; 2585 /* working stuff for values insertion */ 2586 PetscBT is_primal; 2587 PetscInt *aux_primal_numbering_B; 2588 /* matrix sizes */ 2589 PetscInt global_size,local_size; 2590 /* temporary change of basis */ 2591 Mat localChangeOfBasisMatrix; 2592 /* extra space for debugging */ 2593 PetscScalar *dbg_work; 2594 2595 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2596 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2597 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2598 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2599 /* nonzeros for local mat */ 2600 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2601 for (i=0;i<pcis->n;i++) nnz[i]=1; 2602 for (i=n_vertices;i<total_counts_cc;i++) { 2603 if (PetscBTLookup(change_basis,i)) { 2604 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2605 if (PetscBTLookup(qr_needed_idx,i)) { 2606 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2607 } else { 2608 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2609 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2610 } 2611 } 2612 } 2613 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2614 ierr = PetscFree(nnz);CHKERRQ(ierr); 2615 /* Set initial identity in the matrix */ 2616 for (i=0;i<pcis->n;i++) { 2617 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2618 } 2619 2620 if (pcbddc->dbg_flag) { 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2623 } 2624 2625 2626 /* Now we loop on the constraints which need a change of basis */ 2627 /* 2628 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2629 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2630 2631 Basic blocks of change of basis matrix T computed by 2632 2633 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2634 2635 | 1 0 ... 0 s_1/S | 2636 | 0 1 ... 0 s_2/S | 2637 | ... | 2638 | 0 ... 1 s_{n-1}/S | 2639 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2640 2641 with S = \sum_{i=1}^n s_i^2 2642 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2643 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2644 2645 - QR decomposition of constraints otherwise 2646 */ 2647 if (qr_needed) { 2648 /* space to store Q */ 2649 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2650 /* first we issue queries for optimal work */ 2651 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2652 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2653 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2654 lqr_work = -1; 2655 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2656 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2657 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2658 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2659 lgqr_work = -1; 2660 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2661 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2662 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2663 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2664 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2665 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2666 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2667 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2668 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2669 /* array to store scaling factors for reflectors */ 2670 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2671 /* array to store rhs and solution of triangular solver */ 2672 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2673 /* allocating workspace for check */ 2674 if (pcbddc->dbg_flag) { 2675 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2676 } 2677 } 2678 /* array to store whether a node is primal or not */ 2679 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2680 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2681 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2682 if (i != total_primal_vertices) { 2683 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2684 } 2685 for (i=0;i<total_primal_vertices;i++) { 2686 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2687 } 2688 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2689 2690 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2691 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2692 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2693 if (PetscBTLookup(change_basis,total_counts)) { 2694 /* get constraint info */ 2695 primal_dofs = constraints_n[total_counts]; 2696 dual_dofs = size_of_constraint-primal_dofs; 2697 2698 if (pcbddc->dbg_flag) { 2699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2700 } 2701 2702 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2703 2704 /* copy quadrature constraints for change of basis check */ 2705 if (pcbddc->dbg_flag) { 2706 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2707 } 2708 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2709 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2710 2711 /* compute QR decomposition of constraints */ 2712 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2713 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2715 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2716 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2717 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2718 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2719 2720 /* explictly compute R^-T */ 2721 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2722 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2723 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2726 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2727 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2728 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2729 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2730 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2731 2732 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2734 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2735 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2736 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2737 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2738 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2739 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2740 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2741 2742 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2743 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2744 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2745 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2747 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2748 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2749 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2750 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2751 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2752 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2753 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2754 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2755 2756 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2757 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2758 /* insert cols for primal dofs */ 2759 for (j=0;j<primal_dofs;j++) { 2760 start_vals = &qr_basis[j*size_of_constraint]; 2761 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2762 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2763 } 2764 /* insert cols for dual dofs */ 2765 for (j=0,k=0;j<dual_dofs;k++) { 2766 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2767 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2768 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2769 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2770 j++; 2771 } 2772 } 2773 2774 /* check change of basis */ 2775 if (pcbddc->dbg_flag) { 2776 PetscInt ii,jj; 2777 PetscBool valid_qr=PETSC_TRUE; 2778 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2783 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2784 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2785 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2786 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2787 for (jj=0;jj<size_of_constraint;jj++) { 2788 for (ii=0;ii<primal_dofs;ii++) { 2789 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2790 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2791 } 2792 } 2793 if (!valid_qr) { 2794 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2795 for (jj=0;jj<size_of_constraint;jj++) { 2796 for (ii=0;ii<primal_dofs;ii++) { 2797 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2798 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2799 } 2800 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2801 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2802 } 2803 } 2804 } 2805 } else { 2806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2807 } 2808 } 2809 } else { /* simple transformation block */ 2810 PetscInt row,col; 2811 PetscScalar val,norm; 2812 2813 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2814 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2815 for (j=0;j<size_of_constraint;j++) { 2816 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2817 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2818 if (!PetscBTLookup(is_primal,row_B)) { 2819 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2820 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2821 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2822 } else { 2823 for (k=0;k<size_of_constraint;k++) { 2824 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2825 if (row != col) { 2826 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2827 } else { 2828 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2829 } 2830 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2831 } 2832 } 2833 } 2834 if (pcbddc->dbg_flag) { 2835 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2836 } 2837 } 2838 } else { 2839 if (pcbddc->dbg_flag) { 2840 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2841 } 2842 } 2843 } 2844 2845 /* free workspace */ 2846 if (qr_needed) { 2847 if (pcbddc->dbg_flag) { 2848 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2849 } 2850 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2851 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2852 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2853 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2854 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2855 } 2856 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2857 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2858 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2859 2860 /* assembling of global change of variable */ 2861 { 2862 Mat tmat; 2863 PetscInt bs; 2864 2865 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2866 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2867 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2868 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2869 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2870 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2871 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2872 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2873 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2874 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2875 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2876 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2877 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2878 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2879 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2880 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2881 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2882 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2883 } 2884 /* check */ 2885 if (pcbddc->dbg_flag) { 2886 PetscReal error; 2887 Vec x,x_change; 2888 2889 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2890 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2891 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2892 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2893 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2894 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2895 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2896 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2897 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2898 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2899 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2900 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2901 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2902 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2903 ierr = VecDestroy(&x);CHKERRQ(ierr); 2904 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2905 } 2906 2907 /* adapt sub_schurs computed (if any) */ 2908 if (pcbddc->use_deluxe_scaling) { 2909 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2910 if (sub_schurs->S_Ej_all) { 2911 Mat S_new,tmat; 2912 IS is_all_N; 2913 2914 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2915 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2916 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2917 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2918 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2919 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2920 sub_schurs->S_Ej_all = S_new; 2921 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2922 if (sub_schurs->sum_S_Ej_all) { 2923 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2924 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2925 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2926 sub_schurs->sum_S_Ej_all = S_new; 2927 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2928 } 2929 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2930 } 2931 } 2932 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2933 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2934 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2935 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2936 } 2937 2938 /* set up change of basis context */ 2939 if (pcbddc->ChangeOfBasisMatrix) { 2940 PCBDDCChange_ctx change_ctx; 2941 2942 if (!pcbddc->new_global_mat) { 2943 PetscInt global_size,local_size; 2944 2945 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2946 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2947 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2948 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2949 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2950 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2951 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2952 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2953 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2954 } else { 2955 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2956 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2957 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2958 } 2959 if (!pcbddc->user_ChangeOfBasisMatrix) { 2960 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2961 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2962 } else { 2963 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2964 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2965 } 2966 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2967 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2968 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2969 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2970 } 2971 2972 /* check if a new primal space has been introduced */ 2973 pcbddc->new_primal_space_local = PETSC_TRUE; 2974 if (olocal_primal_size == pcbddc->local_primal_size) { 2975 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2976 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2977 if (!pcbddc->new_primal_space_local) { 2978 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2979 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2980 } 2981 } 2982 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2983 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2984 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2985 2986 /* flush dbg viewer */ 2987 if (pcbddc->dbg_flag) { 2988 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2989 } 2990 2991 /* free workspace */ 2992 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2993 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2994 if (!pcbddc->adaptive_selection) { 2995 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2996 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2997 } else { 2998 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2999 pcbddc->adaptive_constraints_idxs_ptr, 3000 pcbddc->adaptive_constraints_data_ptr, 3001 pcbddc->adaptive_constraints_idxs, 3002 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3003 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3004 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3005 } 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3011 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3012 { 3013 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3014 PC_IS *pcis = (PC_IS*)pc->data; 3015 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3016 PetscInt ierr,i,vertex_size,N; 3017 PetscViewer viewer=pcbddc->dbg_viewer; 3018 3019 PetscFunctionBegin; 3020 /* Reset previously computed graph */ 3021 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3022 /* Init local Graph struct */ 3023 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3024 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3025 3026 /* Check validity of the csr graph passed in by the user */ 3027 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3028 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3029 } 3030 3031 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3032 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3033 PetscInt *xadj,*adjncy; 3034 PetscInt nvtxs; 3035 PetscBool flg_row=PETSC_FALSE; 3036 3037 if (pcbddc->use_local_adj) { 3038 3039 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3040 if (flg_row) { 3041 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3042 pcbddc->computed_rowadj = PETSC_TRUE; 3043 } 3044 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3045 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3046 IS is_dummy; 3047 ISLocalToGlobalMapping l2gmap_dummy; 3048 PetscInt j,sum; 3049 PetscInt *cxadj,*cadjncy; 3050 const PetscInt *idxs; 3051 PCBDDCGraph graph; 3052 PetscBT is_on_boundary; 3053 3054 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3055 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3056 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3057 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3058 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3059 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3060 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3061 if (flg_row) { 3062 graph->xadj = xadj; 3063 graph->adjncy = adjncy; 3064 } 3065 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3066 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3067 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3068 3069 if (pcbddc->dbg_flag) { 3070 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3071 for (i=0;i<graph->ncc;i++) { 3072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3073 } 3074 } 3075 3076 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3077 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3078 for (i=0;i<pcis->n_B;i++) { 3079 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3080 } 3081 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3082 3083 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3084 sum = 0; 3085 for (i=0;i<graph->ncc;i++) { 3086 PetscInt sizecc = 0; 3087 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3088 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3089 sizecc++; 3090 } 3091 } 3092 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3093 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3094 cxadj[graph->queue[j]] = sizecc; 3095 } 3096 } 3097 sum += sizecc*sizecc; 3098 } 3099 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3100 sum = 0; 3101 for (i=0;i<pcis->n;i++) { 3102 PetscInt temp = cxadj[i]; 3103 cxadj[i] = sum; 3104 sum += temp; 3105 } 3106 cxadj[pcis->n] = sum; 3107 for (i=0;i<graph->ncc;i++) { 3108 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3109 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3110 PetscInt k,sizecc = 0; 3111 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3112 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3113 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3114 sizecc++; 3115 } 3116 } 3117 } 3118 } 3119 } 3120 if (sum) { 3121 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3122 } else { 3123 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3124 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3125 } 3126 graph->xadj = 0; 3127 graph->adjncy = 0; 3128 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3129 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3130 } 3131 } 3132 if (pcbddc->dbg_flag) { 3133 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3134 } 3135 3136 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3137 vertex_size = 1; 3138 if (pcbddc->user_provided_isfordofs) { 3139 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3140 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3141 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3142 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3143 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3144 } 3145 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3146 pcbddc->n_ISForDofs = 0; 3147 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3148 } 3149 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3150 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3151 } else { 3152 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3153 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3154 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3155 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3156 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3157 } 3158 } 3159 } 3160 3161 /* Setup of Graph */ 3162 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3163 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3164 } 3165 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3166 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3167 } 3168 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3169 3170 /* Graph's connected components analysis */ 3171 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3172 3173 /* print some info to stdout */ 3174 if (pcbddc->dbg_flag) { 3175 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3176 } 3177 3178 /* mark topography has done */ 3179 pcbddc->recompute_topography = PETSC_FALSE; 3180 PetscFunctionReturn(0); 3181 } 3182 3183 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "PCBDDCSubsetNumbering" 3186 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3187 { 3188 PetscSF sf; 3189 PetscLayout map; 3190 const PetscInt *idxs; 3191 PetscInt *leaf_data,*root_data,*gidxs; 3192 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3193 PetscInt n_n,nlocals,start,first_index; 3194 PetscMPIInt commsize; 3195 PetscBool first_found; 3196 PetscErrorCode ierr; 3197 3198 PetscFunctionBegin; 3199 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3200 if (subset_mult) { 3201 PetscCheckSameComm(subset,1,subset_mult,2); 3202 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3203 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3204 } 3205 /* create workspace layout for computing global indices of subset */ 3206 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3207 lbounds[0] = lbounds[1] = 0; 3208 for (i=0;i<n;i++) { 3209 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3210 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3211 } 3212 lbounds[0] = -lbounds[0]; 3213 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3214 gbounds[0] = -gbounds[0]; 3215 N = gbounds[1] - gbounds[0] + 1; 3216 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3217 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3218 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3219 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3220 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3221 3222 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3223 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3224 if (subset_mult) { 3225 const PetscInt* idxs_mult; 3226 3227 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3228 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3229 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3230 } else { 3231 for (i=0;i<n;i++) leaf_data[i] = 1; 3232 } 3233 /* local size of new subset */ 3234 n_n = 0; 3235 for (i=0;i<n;i++) n_n += leaf_data[i]; 3236 3237 /* global indexes in layout */ 3238 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3239 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3240 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3241 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3242 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3243 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3244 3245 /* reduce from leaves to roots */ 3246 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3247 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3248 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3249 3250 /* count indexes in local part of layout */ 3251 nlocals = 0; 3252 first_index = -1; 3253 first_found = PETSC_FALSE; 3254 for (i=0;i<Nl;i++) { 3255 if (!first_found && root_data[i]) { 3256 first_found = PETSC_TRUE; 3257 first_index = i; 3258 } 3259 nlocals += root_data[i]; 3260 } 3261 3262 /* cumulative of number of indexes and size of subset without holes */ 3263 #if defined(PETSC_HAVE_MPI_EXSCAN) 3264 start = 0; 3265 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3266 #else 3267 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3268 start = start-nlocals; 3269 #endif 3270 3271 if (N_n) { /* compute total size of new subset if requested */ 3272 *N_n = start + nlocals; 3273 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3274 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3275 } 3276 3277 /* adapt root data with cumulative */ 3278 if (first_found) { 3279 PetscInt old_index; 3280 3281 root_data[first_index] += start; 3282 old_index = first_index; 3283 for (i=first_index+1;i<Nl;i++) { 3284 if (root_data[i]) { 3285 root_data[i] += root_data[old_index]; 3286 old_index = i; 3287 } 3288 } 3289 } 3290 3291 /* from roots to leaves */ 3292 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3293 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3294 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3295 3296 /* create new IS with global indexes without holes */ 3297 if (subset_mult) { 3298 const PetscInt* idxs_mult; 3299 PetscInt cum; 3300 3301 cum = 0; 3302 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3303 for (i=0;i<n;i++) { 3304 PetscInt j; 3305 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3306 } 3307 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3308 } else { 3309 for (i=0;i<n;i++) { 3310 gidxs[i] = leaf_data[i]-1; 3311 } 3312 } 3313 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3314 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3315 PetscFunctionReturn(0); 3316 } 3317 3318 #undef __FUNCT__ 3319 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3320 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3321 { 3322 PetscInt i,j; 3323 PetscScalar *alphas; 3324 PetscErrorCode ierr; 3325 3326 PetscFunctionBegin; 3327 /* this implements stabilized Gram-Schmidt */ 3328 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3329 for (i=0;i<n;i++) { 3330 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3331 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3332 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3333 } 3334 ierr = PetscFree(alphas);CHKERRQ(ierr); 3335 PetscFunctionReturn(0); 3336 } 3337 3338 #undef __FUNCT__ 3339 #define __FUNCT__ "MatISGetSubassemblingPattern" 3340 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3341 { 3342 IS ranks_send_to; 3343 PetscInt n_neighs,*neighs,*n_shared,**shared; 3344 PetscMPIInt size,rank,color; 3345 PetscInt *xadj,*adjncy; 3346 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3347 PetscInt i,local_size,threshold=0; 3348 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3349 PetscSubcomm subcomm; 3350 PetscErrorCode ierr; 3351 3352 PetscFunctionBegin; 3353 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3354 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3355 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3356 3357 /* Get info on mapping */ 3358 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3359 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3360 3361 /* build local CSR graph of subdomains' connectivity */ 3362 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3363 xadj[0] = 0; 3364 xadj[1] = PetscMax(n_neighs-1,0); 3365 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3366 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3367 3368 if (threshold) { 3369 PetscInt xadj_count = 0; 3370 for (i=1;i<n_neighs;i++) { 3371 if (n_shared[i] > threshold) { 3372 adjncy[xadj_count] = neighs[i]; 3373 adjncy_wgt[xadj_count] = n_shared[i]; 3374 xadj_count++; 3375 } 3376 } 3377 xadj[1] = xadj_count; 3378 } else { 3379 if (xadj[1]) { 3380 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3381 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3382 } 3383 } 3384 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3385 if (use_square) { 3386 for (i=0;i<xadj[1];i++) { 3387 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3388 } 3389 } 3390 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3391 3392 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3393 3394 /* 3395 Restrict work on active processes only. 3396 */ 3397 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3398 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3399 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3400 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3401 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3402 if (color) { 3403 ierr = PetscFree(xadj);CHKERRQ(ierr); 3404 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3405 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3406 } else { 3407 Mat subdomain_adj; 3408 IS new_ranks,new_ranks_contig; 3409 MatPartitioning partitioner; 3410 PetscInt prank,rstart=0,rend=0; 3411 PetscInt *is_indices,*oldranks; 3412 PetscBool aggregate; 3413 3414 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3415 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3416 prank = rank; 3417 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3418 /* 3419 for (i=0;i<size;i++) { 3420 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3421 } 3422 */ 3423 for (i=0;i<xadj[1];i++) { 3424 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3425 } 3426 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3427 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3428 if (aggregate) { 3429 PetscInt lrows,row,ncols,*cols; 3430 PetscMPIInt nrank; 3431 PetscScalar *vals; 3432 3433 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3434 lrows = 0; 3435 if (nrank<redprocs) { 3436 lrows = size/redprocs; 3437 if (nrank<size%redprocs) lrows++; 3438 } 3439 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3440 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3441 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3442 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3443 row = nrank; 3444 ncols = xadj[1]-xadj[0]; 3445 cols = adjncy; 3446 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3447 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3448 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3449 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3450 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3451 ierr = PetscFree(xadj);CHKERRQ(ierr); 3452 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3453 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3454 ierr = PetscFree(vals);CHKERRQ(ierr); 3455 } else { 3456 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3457 } 3458 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3459 3460 /* Partition */ 3461 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3462 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3463 if (use_vwgt) { 3464 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3465 v_wgt[0] = local_size; 3466 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3467 } 3468 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3469 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3470 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3471 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3472 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3473 3474 /* renumber new_ranks to avoid "holes" in new set of processors */ 3475 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3476 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3477 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3478 if (!redprocs) { 3479 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3480 } else { 3481 PetscInt idxs[1]; 3482 PetscMPIInt tag; 3483 MPI_Request *reqs; 3484 3485 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3486 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3487 for (i=rstart;i<rend;i++) { 3488 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3489 } 3490 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3491 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3492 ierr = PetscFree(reqs);CHKERRQ(ierr); 3493 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3494 } 3495 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3496 /* clean up */ 3497 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3498 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3499 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3500 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3501 } 3502 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3503 3504 /* assemble parallel IS for sends */ 3505 i = 1; 3506 if (color) i=0; 3507 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3508 /* get back IS */ 3509 *is_sends = ranks_send_to; 3510 PetscFunctionReturn(0); 3511 } 3512 3513 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3514 3515 #undef __FUNCT__ 3516 #define __FUNCT__ "MatISSubassemble" 3517 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3518 { 3519 Mat local_mat; 3520 IS is_sends_internal; 3521 PetscInt rows,cols,new_local_rows; 3522 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3523 PetscBool ismatis,isdense,newisdense,destroy_mat; 3524 ISLocalToGlobalMapping l2gmap; 3525 PetscInt* l2gmap_indices; 3526 const PetscInt* is_indices; 3527 MatType new_local_type; 3528 /* buffers */ 3529 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3530 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3531 PetscInt *recv_buffer_idxs_local; 3532 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3533 /* MPI */ 3534 MPI_Comm comm,comm_n; 3535 PetscSubcomm subcomm; 3536 PetscMPIInt n_sends,n_recvs,commsize; 3537 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3538 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3539 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3540 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3541 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3542 PetscErrorCode ierr; 3543 3544 PetscFunctionBegin; 3545 /* TODO: add missing checks */ 3546 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3547 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3548 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3549 PetscValidLogicalCollectiveInt(mat,nis,7); 3550 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3551 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3552 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3553 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3554 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3555 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3556 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3557 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3558 PetscInt mrows,mcols,mnrows,mncols; 3559 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3560 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3561 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3562 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3563 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3564 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3565 } 3566 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3567 PetscValidLogicalCollectiveInt(mat,bs,0); 3568 /* prepare IS for sending if not provided */ 3569 if (!is_sends) { 3570 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3571 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3572 } else { 3573 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3574 is_sends_internal = is_sends; 3575 } 3576 3577 /* get comm */ 3578 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3579 3580 /* compute number of sends */ 3581 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3582 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3583 3584 /* compute number of receives */ 3585 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3586 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3587 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3588 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3589 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3590 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3591 ierr = PetscFree(iflags);CHKERRQ(ierr); 3592 3593 /* restrict comm if requested */ 3594 subcomm = 0; 3595 destroy_mat = PETSC_FALSE; 3596 if (restrict_comm) { 3597 PetscMPIInt color,subcommsize; 3598 3599 color = 0; 3600 if (restrict_full) { 3601 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3602 } else { 3603 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3604 } 3605 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3606 subcommsize = commsize - subcommsize; 3607 /* check if reuse has been requested */ 3608 if (reuse == MAT_REUSE_MATRIX) { 3609 if (*mat_n) { 3610 PetscMPIInt subcommsize2; 3611 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3612 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3613 comm_n = PetscObjectComm((PetscObject)*mat_n); 3614 } else { 3615 comm_n = PETSC_COMM_SELF; 3616 } 3617 } else { /* MAT_INITIAL_MATRIX */ 3618 PetscMPIInt rank; 3619 3620 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3621 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3622 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3623 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3624 comm_n = PetscSubcommChild(subcomm); 3625 } 3626 /* flag to destroy *mat_n if not significative */ 3627 if (color) destroy_mat = PETSC_TRUE; 3628 } else { 3629 comm_n = comm; 3630 } 3631 3632 /* prepare send/receive buffers */ 3633 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3634 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3635 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3636 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3637 if (nis) { 3638 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3639 } 3640 3641 /* Get data from local matrices */ 3642 if (!isdense) { 3643 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3644 /* TODO: See below some guidelines on how to prepare the local buffers */ 3645 /* 3646 send_buffer_vals should contain the raw values of the local matrix 3647 send_buffer_idxs should contain: 3648 - MatType_PRIVATE type 3649 - PetscInt size_of_l2gmap 3650 - PetscInt global_row_indices[size_of_l2gmap] 3651 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3652 */ 3653 } else { 3654 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3655 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3656 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3657 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3658 send_buffer_idxs[1] = i; 3659 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3660 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3661 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3662 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3663 for (i=0;i<n_sends;i++) { 3664 ilengths_vals[is_indices[i]] = len*len; 3665 ilengths_idxs[is_indices[i]] = len+2; 3666 } 3667 } 3668 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3669 /* additional is (if any) */ 3670 if (nis) { 3671 PetscMPIInt psum; 3672 PetscInt j; 3673 for (j=0,psum=0;j<nis;j++) { 3674 PetscInt plen; 3675 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3676 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3677 psum += len+1; /* indices + lenght */ 3678 } 3679 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3680 for (j=0,psum=0;j<nis;j++) { 3681 PetscInt plen; 3682 const PetscInt *is_array_idxs; 3683 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3684 send_buffer_idxs_is[psum] = plen; 3685 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3686 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3687 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3688 psum += plen+1; /* indices + lenght */ 3689 } 3690 for (i=0;i<n_sends;i++) { 3691 ilengths_idxs_is[is_indices[i]] = psum; 3692 } 3693 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3694 } 3695 3696 buf_size_idxs = 0; 3697 buf_size_vals = 0; 3698 buf_size_idxs_is = 0; 3699 for (i=0;i<n_recvs;i++) { 3700 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3701 buf_size_vals += (PetscInt)olengths_vals[i]; 3702 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3703 } 3704 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3705 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3706 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3707 3708 /* get new tags for clean communications */ 3709 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3710 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3711 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3712 3713 /* allocate for requests */ 3714 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3715 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3716 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3717 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3718 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3719 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3720 3721 /* communications */ 3722 ptr_idxs = recv_buffer_idxs; 3723 ptr_vals = recv_buffer_vals; 3724 ptr_idxs_is = recv_buffer_idxs_is; 3725 for (i=0;i<n_recvs;i++) { 3726 source_dest = onodes[i]; 3727 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3728 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3729 ptr_idxs += olengths_idxs[i]; 3730 ptr_vals += olengths_vals[i]; 3731 if (nis) { 3732 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3733 ptr_idxs_is += olengths_idxs_is[i]; 3734 } 3735 } 3736 for (i=0;i<n_sends;i++) { 3737 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3738 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3739 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3740 if (nis) { 3741 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3742 } 3743 } 3744 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3745 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3746 3747 /* assemble new l2g map */ 3748 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3749 ptr_idxs = recv_buffer_idxs; 3750 new_local_rows = 0; 3751 for (i=0;i<n_recvs;i++) { 3752 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3753 ptr_idxs += olengths_idxs[i]; 3754 } 3755 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3756 ptr_idxs = recv_buffer_idxs; 3757 new_local_rows = 0; 3758 for (i=0;i<n_recvs;i++) { 3759 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3760 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3761 ptr_idxs += olengths_idxs[i]; 3762 } 3763 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3764 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3765 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3766 3767 /* infer new local matrix type from received local matrices type */ 3768 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3769 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3770 if (n_recvs) { 3771 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3772 ptr_idxs = recv_buffer_idxs; 3773 for (i=0;i<n_recvs;i++) { 3774 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3775 new_local_type_private = MATAIJ_PRIVATE; 3776 break; 3777 } 3778 ptr_idxs += olengths_idxs[i]; 3779 } 3780 switch (new_local_type_private) { 3781 case MATDENSE_PRIVATE: 3782 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3783 new_local_type = MATSEQAIJ; 3784 bs = 1; 3785 } else { /* if I receive only 1 dense matrix */ 3786 new_local_type = MATSEQDENSE; 3787 bs = 1; 3788 } 3789 break; 3790 case MATAIJ_PRIVATE: 3791 new_local_type = MATSEQAIJ; 3792 bs = 1; 3793 break; 3794 case MATBAIJ_PRIVATE: 3795 new_local_type = MATSEQBAIJ; 3796 break; 3797 case MATSBAIJ_PRIVATE: 3798 new_local_type = MATSEQSBAIJ; 3799 break; 3800 default: 3801 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3802 break; 3803 } 3804 } else { /* by default, new_local_type is seqdense */ 3805 new_local_type = MATSEQDENSE; 3806 bs = 1; 3807 } 3808 3809 /* create MATIS object if needed */ 3810 if (reuse == MAT_INITIAL_MATRIX) { 3811 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3812 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3813 } else { 3814 /* it also destroys the local matrices */ 3815 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3816 } 3817 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3818 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3819 3820 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3821 3822 /* Global to local map of received indices */ 3823 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3824 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3825 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3826 3827 /* restore attributes -> type of incoming data and its size */ 3828 buf_size_idxs = 0; 3829 for (i=0;i<n_recvs;i++) { 3830 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3831 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3832 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3833 } 3834 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3835 3836 /* set preallocation */ 3837 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3838 if (!newisdense) { 3839 PetscInt *new_local_nnz=0; 3840 3841 ptr_vals = recv_buffer_vals; 3842 ptr_idxs = recv_buffer_idxs_local; 3843 if (n_recvs) { 3844 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3845 } 3846 for (i=0;i<n_recvs;i++) { 3847 PetscInt j; 3848 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3849 for (j=0;j<*(ptr_idxs+1);j++) { 3850 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3851 } 3852 } else { 3853 /* TODO */ 3854 } 3855 ptr_idxs += olengths_idxs[i]; 3856 } 3857 if (new_local_nnz) { 3858 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3859 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3860 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3861 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3862 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3863 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3864 } else { 3865 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3866 } 3867 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3868 } else { 3869 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3870 } 3871 3872 /* set values */ 3873 ptr_vals = recv_buffer_vals; 3874 ptr_idxs = recv_buffer_idxs_local; 3875 for (i=0;i<n_recvs;i++) { 3876 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3877 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3878 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3879 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3880 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3881 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3882 } else { 3883 /* TODO */ 3884 } 3885 ptr_idxs += olengths_idxs[i]; 3886 ptr_vals += olengths_vals[i]; 3887 } 3888 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3889 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3893 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3894 3895 #if 0 3896 if (!restrict_comm) { /* check */ 3897 Vec lvec,rvec; 3898 PetscReal infty_error; 3899 3900 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3901 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3902 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3903 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3904 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3905 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3906 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3907 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3908 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3909 } 3910 #endif 3911 3912 /* assemble new additional is (if any) */ 3913 if (nis) { 3914 PetscInt **temp_idxs,*count_is,j,psum; 3915 3916 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3917 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3918 ptr_idxs = recv_buffer_idxs_is; 3919 psum = 0; 3920 for (i=0;i<n_recvs;i++) { 3921 for (j=0;j<nis;j++) { 3922 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3923 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3924 psum += plen; 3925 ptr_idxs += plen+1; /* shift pointer to received data */ 3926 } 3927 } 3928 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3929 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3930 for (i=1;i<nis;i++) { 3931 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3932 } 3933 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3934 ptr_idxs = recv_buffer_idxs_is; 3935 for (i=0;i<n_recvs;i++) { 3936 for (j=0;j<nis;j++) { 3937 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3938 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3939 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3940 ptr_idxs += plen+1; /* shift pointer to received data */ 3941 } 3942 } 3943 for (i=0;i<nis;i++) { 3944 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3945 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3946 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3947 } 3948 ierr = PetscFree(count_is);CHKERRQ(ierr); 3949 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3950 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3951 } 3952 /* free workspace */ 3953 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3954 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3955 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3956 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3957 if (isdense) { 3958 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3959 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3960 } else { 3961 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3962 } 3963 if (nis) { 3964 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3965 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3966 } 3967 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3968 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3969 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3970 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3971 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3972 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3973 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3974 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3975 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3976 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3977 ierr = PetscFree(onodes);CHKERRQ(ierr); 3978 if (nis) { 3979 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3980 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3981 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3982 } 3983 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3984 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3985 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3986 for (i=0;i<nis;i++) { 3987 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3988 } 3989 *mat_n = NULL; 3990 } 3991 PetscFunctionReturn(0); 3992 } 3993 3994 /* temporary hack into ksp private data structure */ 3995 #include <petsc/private/kspimpl.h> 3996 3997 #undef __FUNCT__ 3998 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3999 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4000 { 4001 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4002 PC_IS *pcis = (PC_IS*)pc->data; 4003 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4004 MatNullSpace CoarseNullSpace=NULL; 4005 ISLocalToGlobalMapping coarse_islg; 4006 IS coarse_is,*isarray; 4007 PetscInt i,im_active=-1,active_procs=-1; 4008 PetscInt nis,nisdofs,nisneu; 4009 PC pc_temp; 4010 PCType coarse_pc_type; 4011 KSPType coarse_ksp_type; 4012 PetscBool multilevel_requested,multilevel_allowed; 4013 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4014 Mat t_coarse_mat_is; 4015 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4016 PetscMPIInt all_procs; 4017 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4018 PetscBool compute_vecs = PETSC_FALSE; 4019 PetscScalar *array; 4020 PetscErrorCode ierr; 4021 4022 PetscFunctionBegin; 4023 /* Assign global numbering to coarse dofs */ 4024 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4025 PetscInt ocoarse_size; 4026 compute_vecs = PETSC_TRUE; 4027 ocoarse_size = pcbddc->coarse_size; 4028 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4029 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4030 /* see if we can avoid some work */ 4031 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4032 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4033 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4034 PC pc; 4035 PetscBool isbddc; 4036 4037 /* temporary workaround since PCBDDC does not have a reset method so far */ 4038 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4039 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4040 if (isbddc) { 4041 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4042 } 4043 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4044 coarse_reuse = PETSC_FALSE; 4045 } else { /* we can safely reuse already computed coarse matrix */ 4046 coarse_reuse = PETSC_TRUE; 4047 } 4048 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4049 coarse_reuse = PETSC_FALSE; 4050 } 4051 /* reset any subassembling information */ 4052 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4053 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4054 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4055 coarse_reuse = PETSC_TRUE; 4056 } 4057 4058 /* count "active" (i.e. with positive local size) and "void" processes */ 4059 im_active = !!(pcis->n); 4060 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4061 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4062 void_procs = all_procs-active_procs; 4063 csin_type_simple = PETSC_TRUE; 4064 redist = PETSC_FALSE; 4065 if (pcbddc->current_level && void_procs) { 4066 csin_ml = PETSC_TRUE; 4067 ncoarse_ml = void_procs; 4068 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4069 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4070 csin_ds = PETSC_TRUE; 4071 ncoarse_ds = pcbddc->redistribute_coarse; 4072 redist = PETSC_TRUE; 4073 } else { 4074 csin_ds = PETSC_TRUE; 4075 ncoarse_ds = active_procs; 4076 redist = PETSC_TRUE; 4077 } 4078 } else { 4079 csin_ml = PETSC_FALSE; 4080 ncoarse_ml = all_procs; 4081 if (void_procs) { 4082 csin_ds = PETSC_TRUE; 4083 ncoarse_ds = void_procs; 4084 csin_type_simple = PETSC_FALSE; 4085 } else { 4086 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4087 csin_ds = PETSC_TRUE; 4088 ncoarse_ds = pcbddc->redistribute_coarse; 4089 redist = PETSC_TRUE; 4090 } else { 4091 csin_ds = PETSC_FALSE; 4092 ncoarse_ds = all_procs; 4093 } 4094 } 4095 } 4096 4097 /* 4098 test if we can go multilevel: three conditions must be satisfied: 4099 - we have not exceeded the number of levels requested 4100 - we can actually subassemble the active processes 4101 - we can find a suitable number of MPI processes where we can place the subassembled problem 4102 */ 4103 multilevel_allowed = PETSC_FALSE; 4104 multilevel_requested = PETSC_FALSE; 4105 if (pcbddc->current_level < pcbddc->max_levels) { 4106 multilevel_requested = PETSC_TRUE; 4107 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4108 multilevel_allowed = PETSC_FALSE; 4109 } else { 4110 multilevel_allowed = PETSC_TRUE; 4111 } 4112 } 4113 /* determine number of process partecipating to coarse solver */ 4114 if (multilevel_allowed) { 4115 ncoarse = ncoarse_ml; 4116 csin = csin_ml; 4117 redist = PETSC_FALSE; 4118 } else { 4119 ncoarse = ncoarse_ds; 4120 csin = csin_ds; 4121 } 4122 4123 /* creates temporary l2gmap and IS for coarse indexes */ 4124 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4125 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4126 4127 /* creates temporary MATIS object for coarse matrix */ 4128 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4129 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4130 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4131 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4132 #if 0 4133 { 4134 PetscViewer viewer; 4135 char filename[256]; 4136 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4138 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4139 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4140 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4141 } 4142 #endif 4143 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4144 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4145 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4146 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4147 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4148 4149 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4150 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4151 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4152 const PetscInt *idxs; 4153 ISLocalToGlobalMapping tmap; 4154 4155 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4156 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4157 /* allocate space for temporary storage */ 4158 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4159 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4160 /* allocate for IS array */ 4161 nisdofs = pcbddc->n_ISForDofsLocal; 4162 nisneu = !!pcbddc->NeumannBoundariesLocal; 4163 nis = nisdofs + nisneu; 4164 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4165 /* dofs splitting */ 4166 for (i=0;i<nisdofs;i++) { 4167 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4168 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4169 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4170 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4171 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4172 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4173 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4174 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4175 } 4176 /* neumann boundaries */ 4177 if (pcbddc->NeumannBoundariesLocal) { 4178 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4179 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4180 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4181 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4182 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4183 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4184 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4185 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4186 } 4187 /* free memory */ 4188 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4189 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4190 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4191 } else { 4192 nis = 0; 4193 nisdofs = 0; 4194 nisneu = 0; 4195 isarray = NULL; 4196 } 4197 /* destroy no longer needed map */ 4198 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4199 4200 /* restrict on coarse candidates (if needed) */ 4201 coarse_mat_is = NULL; 4202 if (csin) { 4203 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4204 if (redist) { 4205 PetscMPIInt rank; 4206 PetscInt spc,n_spc_p1,dest[1],destsize; 4207 4208 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4209 spc = active_procs/ncoarse; 4210 n_spc_p1 = active_procs%ncoarse; 4211 if (im_active) { 4212 destsize = 1; 4213 if (rank > n_spc_p1*(spc+1)-1) { 4214 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4215 } else { 4216 dest[0] = rank/(spc+1); 4217 } 4218 } else { 4219 destsize = 0; 4220 } 4221 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4222 } else if (csin_type_simple) { 4223 PetscMPIInt rank; 4224 PetscInt issize,isidx; 4225 4226 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4227 if (im_active) { 4228 issize = 1; 4229 isidx = (PetscInt)rank; 4230 } else { 4231 issize = 0; 4232 isidx = -1; 4233 } 4234 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4235 } else { /* get a suitable subassembling pattern from MATIS code */ 4236 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4237 } 4238 4239 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4240 if (!redist || ncoarse <= void_procs) { 4241 PetscInt ncoarse_cand,tissize,*nisindices; 4242 PetscInt *coarse_candidates; 4243 const PetscInt* tisindices; 4244 4245 /* get coarse candidates' ranks in pc communicator */ 4246 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4247 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4248 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4249 if (!coarse_candidates[i]) { 4250 coarse_candidates[ncoarse_cand++]=i; 4251 } 4252 } 4253 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4254 4255 4256 if (pcbddc->dbg_flag) { 4257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4258 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4259 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4261 for (i=0;i<ncoarse_cand;i++) { 4262 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4263 } 4264 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4265 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4266 } 4267 /* shift the pattern on coarse candidates */ 4268 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4269 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4270 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4271 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4272 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4273 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4274 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4275 } 4276 if (pcbddc->dbg_flag) { 4277 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4278 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4279 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4280 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4281 } 4282 } 4283 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4284 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4285 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4286 } else { /* this is the last level, so use just receiving processes in subcomm */ 4287 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4288 } 4289 } else { 4290 if (pcbddc->dbg_flag) { 4291 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4292 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4293 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4294 } 4295 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4296 coarse_mat_is = t_coarse_mat_is; 4297 } 4298 4299 /* create local to global scatters for coarse problem */ 4300 if (compute_vecs) { 4301 PetscInt lrows; 4302 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4303 if (coarse_mat_is) { 4304 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4305 } else { 4306 lrows = 0; 4307 } 4308 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4309 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4310 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4311 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4312 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4313 } 4314 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4315 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4316 4317 /* set defaults for coarse KSP and PC */ 4318 if (multilevel_allowed) { 4319 coarse_ksp_type = KSPRICHARDSON; 4320 coarse_pc_type = PCBDDC; 4321 } else { 4322 coarse_ksp_type = KSPPREONLY; 4323 coarse_pc_type = PCREDUNDANT; 4324 } 4325 4326 /* print some info if requested */ 4327 if (pcbddc->dbg_flag) { 4328 if (!multilevel_allowed) { 4329 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4330 if (multilevel_requested) { 4331 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4332 } else if (pcbddc->max_levels) { 4333 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4334 } 4335 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4336 } 4337 } 4338 4339 /* create the coarse KSP object only once with defaults */ 4340 if (coarse_mat_is) { 4341 MatReuse coarse_mat_reuse; 4342 PetscViewer dbg_viewer = NULL; 4343 if (pcbddc->dbg_flag) { 4344 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4345 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4346 } 4347 if (!pcbddc->coarse_ksp) { 4348 char prefix[256],str_level[16]; 4349 size_t len; 4350 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4351 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4352 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4353 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4354 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4355 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4356 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4357 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4358 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4359 /* prefix */ 4360 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4361 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4362 if (!pcbddc->current_level) { 4363 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4364 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4365 } else { 4366 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4367 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4368 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4369 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4370 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4371 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4372 } 4373 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4374 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4375 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4376 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4377 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4378 /* allow user customization */ 4379 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4380 } 4381 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4382 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4383 if (nisdofs) { 4384 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4385 for (i=0;i<nisdofs;i++) { 4386 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4387 } 4388 } 4389 if (nisneu) { 4390 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4391 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4392 } 4393 4394 /* get some info after set from options */ 4395 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4396 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4397 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4398 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4399 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4400 isbddc = PETSC_FALSE; 4401 } 4402 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4403 if (isredundant) { 4404 KSP inner_ksp; 4405 PC inner_pc; 4406 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4407 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4408 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4409 } 4410 4411 /* assemble coarse matrix */ 4412 if (coarse_reuse) { 4413 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4414 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4415 coarse_mat_reuse = MAT_REUSE_MATRIX; 4416 } else { 4417 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4418 } 4419 if (isbddc || isnn) { 4420 if (pcbddc->coarsening_ratio > 1) { 4421 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4422 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4423 if (pcbddc->dbg_flag) { 4424 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4425 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4426 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4427 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4428 } 4429 } 4430 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4431 } else { 4432 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4433 coarse_mat = coarse_mat_is; 4434 } 4435 } else { 4436 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4437 } 4438 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4439 4440 /* propagate symmetry info of coarse matrix */ 4441 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4442 if (pc->pmat->symmetric_set) { 4443 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4444 } 4445 if (pc->pmat->hermitian_set) { 4446 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4447 } 4448 if (pc->pmat->spd_set) { 4449 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4450 } 4451 /* set operators */ 4452 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4453 if (pcbddc->dbg_flag) { 4454 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4455 } 4456 } else { /* processes non partecipating to coarse solver (if any) */ 4457 coarse_mat = 0; 4458 } 4459 ierr = PetscFree(isarray);CHKERRQ(ierr); 4460 #if 0 4461 { 4462 PetscViewer viewer; 4463 char filename[256]; 4464 sprintf(filename,"coarse_mat.m"); 4465 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4466 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4467 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4468 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4469 } 4470 #endif 4471 4472 /* Compute coarse null space (special handling by BDDC only) */ 4473 #if 0 4474 if (pcbddc->NullSpace) { 4475 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4476 } 4477 #endif 4478 4479 if (pcbddc->coarse_ksp) { 4480 Vec crhs,csol; 4481 PetscBool ispreonly; 4482 4483 if (CoarseNullSpace) { 4484 if (isbddc) { 4485 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4486 } else { 4487 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4488 } 4489 } 4490 /* setup coarse ksp */ 4491 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4492 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4493 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4494 /* hack */ 4495 if (!csol) { 4496 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4497 } 4498 if (!crhs) { 4499 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4500 } 4501 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4502 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4503 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4504 KSP check_ksp; 4505 KSPType check_ksp_type; 4506 PC check_pc; 4507 Vec check_vec,coarse_vec; 4508 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4509 PetscInt its; 4510 PetscBool compute_eigs; 4511 PetscReal *eigs_r,*eigs_c; 4512 PetscInt neigs; 4513 const char *prefix; 4514 4515 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4516 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4517 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4518 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4519 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4520 if (ispreonly) { 4521 check_ksp_type = KSPPREONLY; 4522 compute_eigs = PETSC_FALSE; 4523 } else { 4524 check_ksp_type = KSPGMRES; 4525 compute_eigs = PETSC_TRUE; 4526 } 4527 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4528 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4529 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4530 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4531 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4532 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4533 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4534 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4535 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4536 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4537 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4538 /* create random vec */ 4539 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4540 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4541 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4542 if (CoarseNullSpace) { 4543 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4544 } 4545 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4546 /* solve coarse problem */ 4547 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4548 if (CoarseNullSpace) { 4549 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4550 } 4551 /* set eigenvalue estimation if preonly has not been requested */ 4552 if (compute_eigs) { 4553 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4554 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4555 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4556 lambda_max = eigs_r[neigs-1]; 4557 lambda_min = eigs_r[0]; 4558 if (pcbddc->use_coarse_estimates) { 4559 if (lambda_max>lambda_min) { 4560 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4561 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4562 } 4563 } 4564 } 4565 4566 /* check coarse problem residual error */ 4567 if (pcbddc->dbg_flag) { 4568 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4569 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4570 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4571 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4572 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4573 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4574 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4575 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4576 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4577 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4578 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4579 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4580 if (compute_eigs) { 4581 PetscReal lambda_max_s,lambda_min_s; 4582 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4583 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4584 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4585 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4586 for (i=0;i<neigs;i++) { 4587 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4588 } 4589 } 4590 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4591 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4592 } 4593 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4594 if (compute_eigs) { 4595 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4596 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4597 } 4598 } 4599 } 4600 /* print additional info */ 4601 if (pcbddc->dbg_flag) { 4602 /* waits until all processes reaches this point */ 4603 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4604 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4605 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4606 } 4607 4608 /* free memory */ 4609 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4610 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4611 PetscFunctionReturn(0); 4612 } 4613 4614 #undef __FUNCT__ 4615 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4616 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4617 { 4618 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4619 PC_IS* pcis = (PC_IS*)pc->data; 4620 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4621 IS subset,subset_mult,subset_n; 4622 PetscInt local_size,coarse_size=0; 4623 PetscInt *local_primal_indices=NULL; 4624 const PetscInt *t_local_primal_indices; 4625 PetscErrorCode ierr; 4626 4627 PetscFunctionBegin; 4628 /* Compute global number of coarse dofs */ 4629 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4630 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4631 } 4632 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4633 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4634 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4635 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4636 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4637 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4638 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4639 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4640 if (local_size != pcbddc->local_primal_size) { 4641 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4642 } 4643 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4644 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4645 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4646 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4647 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4648 4649 /* check numbering */ 4650 if (pcbddc->dbg_flag) { 4651 PetscScalar coarsesum,*array; 4652 PetscInt i; 4653 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4654 4655 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4656 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4658 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4659 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4660 for (i=0;i<pcbddc->local_primal_size;i++) { 4661 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4662 } 4663 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4664 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4665 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4666 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4667 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4668 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4669 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4670 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4671 for (i=0;i<pcis->n;i++) { 4672 if (array[i] == 1.0) { 4673 set_error = PETSC_TRUE; 4674 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4675 } 4676 } 4677 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4678 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4679 for (i=0;i<pcis->n;i++) { 4680 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4681 } 4682 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4683 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4684 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4685 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4686 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4687 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4688 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4689 PetscInt *gidxs; 4690 4691 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4692 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4693 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4694 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4696 for (i=0;i<pcbddc->local_primal_size;i++) { 4697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4698 } 4699 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4700 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4701 } 4702 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4703 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4704 } 4705 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4706 /* get back data */ 4707 *coarse_size_n = coarse_size; 4708 *local_primal_indices_n = local_primal_indices; 4709 PetscFunctionReturn(0); 4710 } 4711 4712 #undef __FUNCT__ 4713 #define __FUNCT__ "PCBDDCGlobalToLocal" 4714 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4715 { 4716 IS localis_t; 4717 PetscInt i,lsize,*idxs,n; 4718 PetscScalar *vals; 4719 PetscErrorCode ierr; 4720 4721 PetscFunctionBegin; 4722 /* get indices in local ordering exploiting local to global map */ 4723 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4724 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4725 for (i=0;i<lsize;i++) vals[i] = 1.0; 4726 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4727 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4728 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4729 if (idxs) { /* multilevel guard */ 4730 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4731 } 4732 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4733 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4734 ierr = PetscFree(vals);CHKERRQ(ierr); 4735 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4736 /* now compute set in local ordering */ 4737 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4738 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4739 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4740 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4741 for (i=0,lsize=0;i<n;i++) { 4742 if (PetscRealPart(vals[i]) > 0.5) { 4743 lsize++; 4744 } 4745 } 4746 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4747 for (i=0,lsize=0;i<n;i++) { 4748 if (PetscRealPart(vals[i]) > 0.5) { 4749 idxs[lsize++] = i; 4750 } 4751 } 4752 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4753 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4754 *localis = localis_t; 4755 PetscFunctionReturn(0); 4756 } 4757 4758 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4759 #undef __FUNCT__ 4760 #define __FUNCT__ "PCBDDCMatMult_Private" 4761 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4762 { 4763 PCBDDCChange_ctx change_ctx; 4764 PetscErrorCode ierr; 4765 4766 PetscFunctionBegin; 4767 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4768 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4769 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4770 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4771 PetscFunctionReturn(0); 4772 } 4773 4774 #undef __FUNCT__ 4775 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4776 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4777 { 4778 PCBDDCChange_ctx change_ctx; 4779 PetscErrorCode ierr; 4780 4781 PetscFunctionBegin; 4782 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4783 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4784 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4785 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4786 PetscFunctionReturn(0); 4787 } 4788 4789 #undef __FUNCT__ 4790 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4791 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4792 { 4793 PC_IS *pcis=(PC_IS*)pc->data; 4794 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4795 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4796 Mat S_j; 4797 PetscInt *used_xadj,*used_adjncy; 4798 PetscBool free_used_adj; 4799 PetscErrorCode ierr; 4800 4801 PetscFunctionBegin; 4802 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4803 free_used_adj = PETSC_FALSE; 4804 if (pcbddc->sub_schurs_layers == -1) { 4805 used_xadj = NULL; 4806 used_adjncy = NULL; 4807 } else { 4808 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4809 used_xadj = pcbddc->mat_graph->xadj; 4810 used_adjncy = pcbddc->mat_graph->adjncy; 4811 } else if (pcbddc->computed_rowadj) { 4812 used_xadj = pcbddc->mat_graph->xadj; 4813 used_adjncy = pcbddc->mat_graph->adjncy; 4814 } else { 4815 PetscBool flg_row=PETSC_FALSE; 4816 const PetscInt *xadj,*adjncy; 4817 PetscInt nvtxs; 4818 4819 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4820 if (flg_row) { 4821 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4822 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4823 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4824 free_used_adj = PETSC_TRUE; 4825 } else { 4826 pcbddc->sub_schurs_layers = -1; 4827 used_xadj = NULL; 4828 used_adjncy = NULL; 4829 } 4830 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4831 } 4832 } 4833 4834 /* setup sub_schurs data */ 4835 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4836 if (!sub_schurs->use_mumps) { 4837 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4838 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4839 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4840 } else { 4841 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4842 PetscBool isseqaij; 4843 if (!pcbddc->use_vertices && reuse_solvers) { 4844 PetscInt n_vertices; 4845 4846 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4847 reuse_solvers = (PetscBool)!n_vertices; 4848 } 4849 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4850 if (!isseqaij) { 4851 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4852 if (matis->A == pcbddc->local_mat) { 4853 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4854 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4855 } else { 4856 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4857 } 4858 } 4859 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4860 } 4861 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4862 4863 /* free adjacency */ 4864 if (free_used_adj) { 4865 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4866 } 4867 PetscFunctionReturn(0); 4868 } 4869 4870 #undef __FUNCT__ 4871 #define __FUNCT__ "PCBDDCInitSubSchurs" 4872 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4873 { 4874 PC_IS *pcis=(PC_IS*)pc->data; 4875 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4876 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4877 PCBDDCGraph graph; 4878 PetscErrorCode ierr; 4879 4880 PetscFunctionBegin; 4881 /* attach interface graph for determining subsets */ 4882 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4883 IS verticesIS,verticescomm; 4884 PetscInt vsize,*idxs; 4885 4886 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4887 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4888 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4889 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4890 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4891 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4892 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4893 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4894 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4895 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4896 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4897 /* 4898 if (pcbddc->dbg_flag) { 4899 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4900 } 4901 */ 4902 } else { 4903 graph = pcbddc->mat_graph; 4904 } 4905 4906 /* sub_schurs init */ 4907 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4908 4909 /* free graph struct */ 4910 if (pcbddc->sub_schurs_rebuild) { 4911 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4912 } 4913 PetscFunctionReturn(0); 4914 } 4915