1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 PetscInt subset_size; 51 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 56 /* min/max and threshold */ 57 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 58 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 59 nmax = PetscMax(nmin,nmax); 60 allocated_S_St = PETSC_FALSE; 61 if (nmin) { 62 allocated_S_St = PETSC_TRUE; 63 } 64 65 /* allocate lapack workspace */ 66 cum = cum2 = 0; 67 maxneigs = 0; 68 for (i=0;i<sub_schurs->n_subs;i++) { 69 PetscInt n,subset_size; 70 71 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 72 n = PetscMin(subset_size,nmax); 73 cum += subset_size; 74 cum2 += subset_size*n; 75 maxneigs = PetscMax(maxneigs,n); 76 } 77 if (mss) { 78 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 79 PetscBLASInt B_itype = 1; 80 PetscBLASInt B_N = mss; 81 PetscReal zero = 0.0; 82 PetscReal eps = 0.0; /* dlamch? */ 83 84 B_lwork = -1; 85 S = NULL; 86 St = NULL; 87 eigs = NULL; 88 eigv = NULL; 89 B_iwork = NULL; 90 B_ifail = NULL; 91 #if defined(PETSC_USE_COMPLEX) 92 rwork = NULL; 93 #endif 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 155 const PetscInt *idxs; 156 PetscReal infty = PETSC_MAX_REAL; 157 PetscInt j,subset_size,eigs_start = 0; 158 PetscBLASInt B_N; 159 PetscBool same_data = PETSC_FALSE; 160 161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 162 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 163 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 164 if (sub_schurs->is_hermitian) { 165 PetscInt j,k; 166 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 167 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 168 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 for (k=j;k<subset_size;k++) { 172 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 173 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 174 } 175 } 176 } else { 177 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 178 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 179 } 180 } else { 181 S = Sarray + cumarray; 182 St = Starray + cumarray; 183 } 184 185 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 186 /* see if we can save some work */ 187 if (sub_schurs->n_subs == 1) { 188 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 189 } 190 191 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 192 B_neigs = 0; 193 } else { 194 /* Threshold: this is an heuristic for edges */ 195 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 196 197 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 198 PetscBLASInt B_itype = 1; 199 PetscBLASInt B_IL, B_IU; 200 PetscReal eps = -1.0; /* dlamch? */ 201 PetscInt nmin_s; 202 203 /* ask for eigenvalues larger than thresh */ 204 if (pcbddc->dbg_flag) { 205 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 206 } 207 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 208 #if defined(PETSC_USE_COMPLEX) 209 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 210 #else 211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 212 #endif 213 ierr = PetscFPTrapPop();CHKERRQ(ierr); 214 if (B_ierr) { 215 if (B_ierr < 0 ) { 216 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 217 } else if (B_ierr <= B_N) { 218 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 219 } else { 220 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 221 } 222 } 223 224 if (B_neigs > nmax) { 225 if (pcbddc->dbg_flag) { 226 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 227 } 228 eigs_start = B_neigs -nmax; 229 B_neigs = nmax; 230 } 231 232 nmin_s = PetscMin(nmin,B_N); 233 if (B_neigs < nmin_s) { 234 PetscBLASInt B_neigs2; 235 236 B_IU = B_N - B_neigs; 237 B_IL = B_N - nmin_s + 1; 238 if (pcbddc->dbg_flag) { 239 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 240 } 241 if (sub_schurs->is_hermitian) { 242 PetscInt j; 243 for (j=0;j<subset_size;j++) { 244 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 245 } 246 for (j=0;j<subset_size;j++) { 247 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 248 } 249 } else { 250 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 251 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 } 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 #if defined(PETSC_USE_COMPLEX) 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 256 #else 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 258 #endif 259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 260 B_neigs += B_neigs2; 261 } 262 if (B_ierr) { 263 if (B_ierr < 0 ) { 264 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 265 } else if (B_ierr <= B_N) { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 267 } else { 268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 269 } 270 } 271 if (pcbddc->dbg_flag) { 272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 273 for (j=0;j<B_neigs;j++) { 274 if (eigs[j] == 0.0) { 275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 276 } else { 277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 278 } 279 } 280 } 281 } else { 282 /* TODO */ 283 } 284 } 285 maxneigs = PetscMax(B_neigs,maxneigs); 286 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 287 if (B_neigs) { 288 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 289 290 if (pcbddc->dbg_flag > 1) { 291 PetscInt ii; 292 for (ii=0;ii<B_neigs;ii++) { 293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 294 for (j=0;j<B_N;j++) { 295 #if defined(PETSC_USE_COMPLEX) 296 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 297 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 299 #else 300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 301 #endif 302 } 303 } 304 } 305 #if 0 306 for (j=0;j<B_neigs;j++) { 307 PetscBLASInt Blas_N,Blas_one = 1.0; 308 PetscScalar norm; 309 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 310 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 311 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 312 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 313 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 314 } else { 315 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 316 } 317 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 318 } 319 #endif 320 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 321 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 322 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 323 cum++; 324 } 325 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 326 /* shift for next computation */ 327 cumarray += subset_size*subset_size; 328 } 329 if (pcbddc->dbg_flag) { 330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 331 } 332 333 if (mss) { 334 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 335 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 336 /* destroy matrices (junk) */ 337 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 338 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 339 } 340 if (allocated_S_St) { 341 ierr = PetscFree2(S,St);CHKERRQ(ierr); 342 } 343 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 344 #if defined(PETSC_USE_COMPLEX) 345 ierr = PetscFree(rwork);CHKERRQ(ierr); 346 #endif 347 if (pcbddc->dbg_flag) { 348 PetscInt maxneigs_r; 349 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 351 } 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCSetUpSolvers" 357 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 358 { 359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 360 PetscScalar *coarse_submat_vals; 361 PetscErrorCode ierr; 362 363 PetscFunctionBegin; 364 /* Setup local scatters R_to_B and (optionally) R_to_D */ 365 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 366 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 367 368 /* Setup local neumann solver ksp_R */ 369 /* PCBDDCSetUpLocalScatters should be called first! */ 370 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 371 372 /* Change global null space passed in by the user if change of basis has been requested */ 373 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 374 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 375 } 376 377 /* 378 Setup local correction and local part of coarse basis. 379 Gives back the dense local part of the coarse matrix in column major ordering 380 */ 381 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 382 383 /* Compute total number of coarse nodes and setup coarse solver */ 384 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 385 386 /* free */ 387 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "PCBDDCResetCustomization" 393 PetscErrorCode PCBDDCResetCustomization(PC pc) 394 { 395 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 396 PetscErrorCode ierr; 397 398 PetscFunctionBegin; 399 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 401 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 402 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 403 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 405 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 406 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 407 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 408 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 409 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetTopography" 415 PetscErrorCode PCBDDCResetTopography(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 422 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 423 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 424 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 425 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCResetSolvers" 431 PetscErrorCode PCBDDCResetSolvers(PC pc) 432 { 433 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *array; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 439 if (pcbddc->coarse_phi_B) { 440 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 441 ierr = PetscFree(array);CHKERRQ(ierr); 442 } 443 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 445 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 447 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 449 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 450 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 451 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 452 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 454 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 455 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 456 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 457 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 458 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 459 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 462 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 463 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 464 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 465 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 471 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 472 { 473 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 474 PC_IS *pcis = (PC_IS*)pc->data; 475 VecType impVecType; 476 PetscInt n_constraints,n_R,old_size; 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 if (!pcbddc->ConstraintMatrix) { 481 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 482 } 483 /* get sizes */ 484 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 485 n_R = pcis->n-pcbddc->n_vertices; 486 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 487 /* local work vectors (try to avoid unneeded work)*/ 488 /* R nodes */ 489 old_size = -1; 490 if (pcbddc->vec1_R) { 491 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 492 } 493 if (n_R != old_size) { 494 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 495 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 496 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 497 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 498 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 499 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 500 } 501 /* local primal dofs */ 502 old_size = -1; 503 if (pcbddc->vec1_P) { 504 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 505 } 506 if (pcbddc->local_primal_size != old_size) { 507 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 508 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 509 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 510 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 511 } 512 /* local explicit constraints */ 513 old_size = -1; 514 if (pcbddc->vec1_C) { 515 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 516 } 517 if (n_constraints && n_constraints != old_size) { 518 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 519 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 520 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 521 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 522 } 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "PCBDDCSetUpCorrection" 528 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 529 { 530 PetscErrorCode ierr; 531 /* pointers to pcis and pcbddc */ 532 PC_IS* pcis = (PC_IS*)pc->data; 533 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 534 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 535 /* submatrices of local problem */ 536 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 537 /* submatrices of local coarse problem */ 538 Mat S_VV,S_CV,S_VC,S_CC; 539 /* working matrices */ 540 Mat C_CR; 541 /* additional working stuff */ 542 PC pc_R; 543 Mat F; 544 PetscBool isLU,isCHOL,isILU; 545 546 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 547 PetscScalar *work; 548 PetscInt *idx_V_B; 549 PetscInt n,n_vertices,n_constraints; 550 PetscInt i,n_R,n_D,n_B; 551 PetscBool unsymmetric_check; 552 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 553 MatType impMatType; 554 /* some shortcuts to scalars */ 555 PetscScalar one=1.0,m_one=-1.0; 556 557 PetscFunctionBegin; 558 n_vertices = pcbddc->n_vertices; 559 n_constraints = pcbddc->local_primal_size-n_vertices; 560 /* Set Non-overlapping dimensions */ 561 n_B = pcis->n_B; 562 n_D = pcis->n - n_B; 563 n_R = pcis->n - n_vertices; 564 565 /* Set types for local objects needed by BDDC precondtioner */ 566 impMatType = MATSEQDENSE; 567 568 /* vertices in boundary numbering */ 569 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 570 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 571 if (i != n_vertices) { 572 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 573 } 574 575 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 576 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 577 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 578 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 580 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 581 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 582 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 584 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 585 586 unsymmetric_check = PETSC_FALSE; 587 /* allocate workspace */ 588 n = 0; 589 if (n_constraints) { 590 n += n_R*n_constraints; 591 } 592 if (n_vertices) { 593 n = PetscMax(2*n_R*n_vertices,n); 594 n = PetscMax((n_R+n_B)*n_vertices,n); 595 } 596 if (!pcbddc->symmetric_primal) { 597 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 598 unsymmetric_check = PETSC_TRUE; 599 } 600 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 601 602 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 603 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 604 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 605 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 606 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 607 if (isLU || isILU || isCHOL) { 608 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 609 } else if (sub_schurs->reuse_mumps) { 610 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 611 MatFactorType type; 612 613 F = reuse_mumps->F; 614 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 615 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 616 } else { 617 F = NULL; 618 } 619 620 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 621 if (n_constraints) { 622 Mat M1,M2,M3; 623 Mat auxmat; 624 IS is_aux; 625 PetscScalar *array,*array2; 626 627 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 628 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 629 630 /* Extract constraints on R nodes: C_{CR} */ 631 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 632 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 633 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 634 635 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 636 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 637 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 638 for (i=0;i<n_constraints;i++) { 639 const PetscScalar *row_cmat_values; 640 const PetscInt *row_cmat_indices; 641 PetscInt size_of_constraint,j; 642 643 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 644 for (j=0;j<size_of_constraint;j++) { 645 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 646 } 647 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 648 } 649 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 650 if (F) { 651 Mat B; 652 653 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 654 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 655 ierr = MatDestroy(&B);CHKERRQ(ierr); 656 } else { 657 PetscScalar *marr; 658 659 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 660 for (i=0;i<n_constraints;i++) { 661 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 662 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 663 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 664 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 665 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 666 } 667 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 668 } 669 if (!pcbddc->switch_static) { 670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 671 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 672 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 673 for (i=0;i<n_constraints;i++) { 674 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 675 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 676 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 677 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 678 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 679 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 680 } 681 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 682 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 683 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 684 } else { 685 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 686 pcbddc->local_auxmat2 = local_auxmat2_R; 687 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 688 } 689 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 690 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 691 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 692 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 693 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 694 if (isCHOL) { 695 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 696 } else { 697 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 698 } 699 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 700 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 701 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 702 ierr = MatDestroy(&M2);CHKERRQ(ierr); 703 ierr = MatDestroy(&M3);CHKERRQ(ierr); 704 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 705 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 706 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 707 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 708 ierr = MatDestroy(&M1);CHKERRQ(ierr); 709 } 710 /* Get submatrices from subdomain matrix */ 711 if (n_vertices) { 712 IS is_aux; 713 714 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 715 IS tis; 716 717 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 718 ierr = ISSort(tis);CHKERRQ(ierr); 719 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 ierr = ISDestroy(&tis);CHKERRQ(ierr); 721 } else { 722 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 723 } 724 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 725 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 726 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 727 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 728 } 729 730 /* Matrix of coarse basis functions (local) */ 731 if (pcbddc->coarse_phi_B) { 732 PetscInt on_B,on_primal,on_D=n_D; 733 if (pcbddc->coarse_phi_D) { 734 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 735 } 736 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 737 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 738 PetscScalar *marray; 739 740 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 741 ierr = PetscFree(marray);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 743 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 744 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 745 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 746 } 747 } 748 749 if (!pcbddc->coarse_phi_B) { 750 PetscScalar *marray; 751 752 n = n_B*pcbddc->local_primal_size; 753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 754 n += n_D*pcbddc->local_primal_size; 755 } 756 if (!pcbddc->symmetric_primal) { 757 n *= 2; 758 } 759 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 760 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 761 n = n_B*pcbddc->local_primal_size; 762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 764 n += n_D*pcbddc->local_primal_size; 765 } 766 if (!pcbddc->symmetric_primal) { 767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 768 if (pcbddc->switch_static || pcbddc->dbg_flag) { 769 n = n_B*pcbddc->local_primal_size; 770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 771 } 772 } else { 773 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 774 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 775 if (pcbddc->switch_static || pcbddc->dbg_flag) { 776 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 777 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 778 } 779 } 780 } 781 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 782 /* vertices */ 783 if (n_vertices) { 784 785 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 786 787 if (n_R) { 788 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 789 PetscBLASInt B_N,B_one = 1; 790 PetscScalar *x,*y; 791 PetscBool isseqaij; 792 793 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 794 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 796 if (F) { /* TODO could be optimized for symmetric problems */ 797 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 798 } else { 799 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 800 for (i=0;i<n_vertices;i++) { 801 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 802 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 803 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 804 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 805 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 806 } 807 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 808 } 809 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 810 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 811 if (n_constraints) { 812 Mat B; 813 814 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 815 for (i=0;i<n_vertices;i++) { 816 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 817 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 818 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 819 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 820 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 821 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 822 } 823 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 824 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 825 ierr = MatDestroy(&B);CHKERRQ(ierr); 826 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 827 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 828 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 829 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 830 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 831 ierr = MatDestroy(&B);CHKERRQ(ierr); 832 } 833 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 834 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 835 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 836 } 837 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 838 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 839 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 840 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 841 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 842 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 843 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 844 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 845 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 846 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 847 } else { 848 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 849 } 850 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 851 /* coarse basis functions */ 852 for (i=0;i<n_vertices;i++) { 853 PetscScalar *y; 854 855 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 856 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 857 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 858 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 y[n_B*i+idx_V_B[i]] = 1.0; 861 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 862 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 863 864 if (pcbddc->switch_static || pcbddc->dbg_flag) { 865 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 866 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 867 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 870 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 871 } 872 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 873 } 874 /* if n_R == 0 the object is not destroyed */ 875 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 876 } 877 878 if (n_constraints) { 879 Mat B; 880 881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 882 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 883 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 884 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 885 if (n_vertices) { 886 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 887 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 888 } else { 889 Mat S_VCt; 890 891 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 892 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 893 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 894 } 895 } 896 ierr = MatDestroy(&B);CHKERRQ(ierr); 897 /* coarse basis functions */ 898 for (i=0;i<n_constraints;i++) { 899 PetscScalar *y; 900 901 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 902 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 903 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 904 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 907 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 908 if (pcbddc->switch_static || pcbddc->dbg_flag) { 909 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 910 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 911 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 912 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 914 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 915 } 916 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 917 } 918 } 919 if (n_constraints) { 920 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 921 } 922 923 /* compute other basis functions for non-symmetric problems */ 924 if (!pcbddc->symmetric_primal) { 925 926 if (n_constraints) { 927 Mat S_CCT,B_C; 928 929 /* this is a lazy thing */ 930 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 931 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 932 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 933 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 934 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 935 if (n_vertices) { 936 Mat B_V,S_VCT; 937 938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 939 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 940 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 941 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 942 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 943 } 944 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 945 } else { /* if there are no constraints, reset work */ 946 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 947 } 948 if (n_vertices && n_R) { 949 Mat A_VRT; 950 PetscScalar *marray; 951 PetscBLASInt B_N,B_one = 1; 952 953 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 954 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 955 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 956 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 957 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 958 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 960 } 961 962 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 963 for (i=0;i<pcbddc->local_primal_size;i++) { 964 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 965 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 966 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 967 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 968 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 969 } 970 } else { 971 for (i=0;i<pcbddc->local_primal_size;i++) { 972 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 973 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 974 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 975 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 976 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 977 } 978 } 979 /* coarse basis functions */ 980 for (i=0;i<pcbddc->local_primal_size;i++) { 981 PetscScalar *y; 982 983 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 984 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 985 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 986 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 if (i<n_vertices) { 989 y[n_B*i+idx_V_B[i]] = 1.0; 990 } 991 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 992 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 993 994 if (pcbddc->switch_static || pcbddc->dbg_flag) { 995 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 996 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 997 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1000 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1001 } 1002 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1003 } 1004 } 1005 /* free memory */ 1006 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1007 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1008 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1009 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1011 ierr = PetscFree(work);CHKERRQ(ierr); 1012 if (n_vertices) { 1013 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1014 } 1015 if (n_constraints) { 1016 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1017 } 1018 /* Checking coarse_sub_mat and coarse basis functios */ 1019 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1020 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1021 if (pcbddc->dbg_flag) { 1022 Mat coarse_sub_mat; 1023 Mat AUXMAT,TM1,TM2,TM3,TM4; 1024 Mat coarse_phi_D,coarse_phi_B; 1025 Mat coarse_psi_D,coarse_psi_B; 1026 Mat A_II,A_BB,A_IB,A_BI; 1027 Mat C_B,CPHI; 1028 IS is_dummy; 1029 Vec mones; 1030 MatType checkmattype=MATSEQAIJ; 1031 PetscReal real_value; 1032 1033 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1034 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1035 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1036 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1037 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1038 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1039 if (unsymmetric_check) { 1040 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1042 } 1043 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1044 1045 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1047 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1048 if (unsymmetric_check) { 1049 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1050 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1051 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1052 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 } else { 1062 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1063 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1064 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1065 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 } 1071 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1072 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1073 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1074 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1077 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1079 1080 /* check constraints */ 1081 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1082 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1083 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1084 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1085 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1086 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1087 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1089 if (unsymmetric_check) { 1090 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1091 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1092 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1093 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1095 } 1096 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1097 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1098 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1099 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1100 1101 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1102 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1103 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1104 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1105 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1106 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1107 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1108 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1109 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1110 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1111 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1112 if (unsymmetric_check) { 1113 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1114 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1115 } 1116 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1117 } 1118 /* get back data */ 1119 *coarse_submat_vals_n = coarse_submat_vals; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1125 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1126 { 1127 Mat *work_mat; 1128 IS isrow_s,iscol_s; 1129 PetscBool rsorted,csorted; 1130 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1135 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1136 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1137 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1138 1139 if (!rsorted) { 1140 const PetscInt *idxs; 1141 PetscInt *idxs_sorted,i; 1142 1143 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1144 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1145 for (i=0;i<rsize;i++) { 1146 idxs_perm_r[i] = i; 1147 } 1148 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1149 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1150 for (i=0;i<rsize;i++) { 1151 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1152 } 1153 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1154 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1155 } else { 1156 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1157 isrow_s = isrow; 1158 } 1159 1160 if (!csorted) { 1161 if (isrow == iscol) { 1162 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1163 iscol_s = isrow_s; 1164 } else { 1165 const PetscInt *idxs; 1166 PetscInt *idxs_sorted,i; 1167 1168 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1169 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1170 for (i=0;i<csize;i++) { 1171 idxs_perm_c[i] = i; 1172 } 1173 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1174 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1175 for (i=0;i<csize;i++) { 1176 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1177 } 1178 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1179 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1180 } 1181 } else { 1182 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1183 iscol_s = iscol; 1184 } 1185 1186 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1187 1188 if (!rsorted || !csorted) { 1189 Mat new_mat; 1190 IS is_perm_r,is_perm_c; 1191 1192 if (!rsorted) { 1193 PetscInt *idxs_r,i; 1194 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1195 for (i=0;i<rsize;i++) { 1196 idxs_r[idxs_perm_r[i]] = i; 1197 } 1198 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1199 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1200 } else { 1201 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1202 } 1203 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1204 1205 if (!csorted) { 1206 if (isrow_s == iscol_s) { 1207 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1208 is_perm_c = is_perm_r; 1209 } else { 1210 PetscInt *idxs_c,i; 1211 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1212 for (i=0;i<csize;i++) { 1213 idxs_c[idxs_perm_c[i]] = i; 1214 } 1215 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1216 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1217 } 1218 } else { 1219 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1220 } 1221 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1222 1223 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1225 work_mat[0] = new_mat; 1226 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1227 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1228 } 1229 1230 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1231 *B = work_mat[0]; 1232 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1233 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1234 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1240 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1241 { 1242 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1243 PC_IS* pcis = (PC_IS*)pc->data; 1244 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1245 Mat new_mat; 1246 IS is_local,is_global; 1247 PetscInt local_size; 1248 PetscBool isseqaij; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1253 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1254 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1255 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1256 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1257 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1258 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1259 1260 /* check */ 1261 if (pcbddc->dbg_flag) { 1262 Vec x,x_change; 1263 PetscReal error; 1264 1265 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1266 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1267 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1268 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1274 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1275 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1277 ierr = VecDestroy(&x);CHKERRQ(ierr); 1278 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1279 } 1280 1281 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1282 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1283 if (isseqaij) { 1284 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1285 } else { 1286 Mat work_mat; 1287 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1288 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1289 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1290 } 1291 if (matis->A->symmetric_set) { 1292 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1293 #if !defined(PETSC_USE_COMPLEX) 1294 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1295 #endif 1296 } 1297 /* 1298 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1299 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1300 */ 1301 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1307 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1308 { 1309 PC_IS* pcis = (PC_IS*)(pc->data); 1310 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1311 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1312 PetscInt *idx_R_local=NULL; 1313 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1314 PetscInt vbs,bs; 1315 PetscBT bitmask=NULL; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 /* 1320 No need to setup local scatters if 1321 - primal space is unchanged 1322 AND 1323 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1324 AND 1325 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1326 */ 1327 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1328 PetscFunctionReturn(0); 1329 } 1330 /* destroy old objects */ 1331 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1332 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1333 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1334 /* Set Non-overlapping dimensions */ 1335 n_B = pcis->n_B; 1336 n_D = pcis->n - n_B; 1337 n_vertices = pcbddc->n_vertices; 1338 1339 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1340 1341 /* create auxiliary bitmask and allocate workspace */ 1342 if (!sub_schurs->reuse_mumps) { 1343 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1344 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1345 for (i=0;i<n_vertices;i++) { 1346 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1347 } 1348 1349 for (i=0, n_R=0; i<pcis->n; i++) { 1350 if (!PetscBTLookup(bitmask,i)) { 1351 idx_R_local[n_R++] = i; 1352 } 1353 } 1354 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1355 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1356 1357 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1358 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1359 } 1360 1361 /* Block code */ 1362 vbs = 1; 1363 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1364 if (bs>1 && !(n_vertices%bs)) { 1365 PetscBool is_blocked = PETSC_TRUE; 1366 PetscInt *vary; 1367 if (!sub_schurs->reuse_mumps) { 1368 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1369 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1370 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1371 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1372 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1373 for (i=0; i<pcis->n/bs; i++) { 1374 if (vary[i]!=0 && vary[i]!=bs) { 1375 is_blocked = PETSC_FALSE; 1376 break; 1377 } 1378 } 1379 ierr = PetscFree(vary);CHKERRQ(ierr); 1380 } else { 1381 /* Verify directly the R set */ 1382 for (i=0; i<n_R/bs; i++) { 1383 PetscInt j,node=idx_R_local[bs*i]; 1384 for (j=1; j<bs; j++) { 1385 if (node != idx_R_local[bs*i+j]-j) { 1386 is_blocked = PETSC_FALSE; 1387 break; 1388 } 1389 } 1390 } 1391 } 1392 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1393 vbs = bs; 1394 for (i=0;i<n_R/vbs;i++) { 1395 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1396 } 1397 } 1398 } 1399 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1400 if (sub_schurs->reuse_mumps) { 1401 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1402 1403 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1404 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1405 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1406 reuse_mumps->is_R = pcbddc->is_R_local; 1407 } else { 1408 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1409 } 1410 1411 /* print some info if requested */ 1412 if (pcbddc->dbg_flag) { 1413 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1414 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1419 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1420 } 1421 1422 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1423 if (!sub_schurs->reuse_mumps) { 1424 IS is_aux1,is_aux2; 1425 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1426 1427 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1428 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1429 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1430 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1431 for (i=0; i<n_D; i++) { 1432 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1433 } 1434 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1435 for (i=0, j=0; i<n_R; i++) { 1436 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1437 aux_array1[j++] = i; 1438 } 1439 } 1440 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1441 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1442 for (i=0, j=0; i<n_B; i++) { 1443 if (!PetscBTLookup(bitmask,is_indices[i])) { 1444 aux_array2[j++] = i; 1445 } 1446 } 1447 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1448 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1449 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1450 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1451 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1452 1453 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1454 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1455 for (i=0, j=0; i<n_R; i++) { 1456 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1457 aux_array1[j++] = i; 1458 } 1459 } 1460 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1461 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1462 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1463 } 1464 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1465 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1466 } else { 1467 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1468 IS tis; 1469 PetscInt schur_size; 1470 1471 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1472 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1473 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1474 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1476 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1477 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1478 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1479 } 1480 } 1481 PetscFunctionReturn(0); 1482 } 1483 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1487 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1488 { 1489 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1490 PC_IS *pcis = (PC_IS*)pc->data; 1491 PC pc_temp; 1492 Mat A_RR; 1493 MatReuse reuse; 1494 PetscScalar m_one = -1.0; 1495 PetscReal value; 1496 PetscInt n_D,n_R; 1497 PetscBool use_exact,use_exact_reduced,issbaij; 1498 PetscErrorCode ierr; 1499 /* prefixes stuff */ 1500 char dir_prefix[256],neu_prefix[256],str_level[16]; 1501 size_t len; 1502 1503 PetscFunctionBegin; 1504 1505 /* compute prefixes */ 1506 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1507 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1508 if (!pcbddc->current_level) { 1509 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1510 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1511 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1512 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1513 } else { 1514 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1515 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1516 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1517 len -= 15; /* remove "pc_bddc_coarse_" */ 1518 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1519 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1520 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1521 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1522 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1523 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1524 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1525 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1526 } 1527 1528 /* DIRICHLET PROBLEM */ 1529 if (dirichlet) { 1530 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1531 if (pcbddc->local_mat->symmetric_set) { 1532 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1533 } 1534 /* Matrix for Dirichlet problem is pcis->A_II */ 1535 n_D = pcis->n - pcis->n_B; 1536 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1537 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1538 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1539 /* default */ 1540 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1541 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1542 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1543 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1544 if (issbaij) { 1545 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1546 } else { 1547 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1548 } 1549 /* Allow user's customization */ 1550 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1551 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1552 } 1553 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1554 if (sub_schurs->reuse_mumps) { 1555 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1556 1557 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1558 } 1559 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1560 if (!n_D) { 1561 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1562 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1563 } 1564 /* Set Up KSP for Dirichlet problem of BDDC */ 1565 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1566 /* set ksp_D into pcis data */ 1567 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1568 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1569 pcis->ksp_D = pcbddc->ksp_D; 1570 } 1571 1572 /* NEUMANN PROBLEM */ 1573 A_RR = 0; 1574 if (neumann) { 1575 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1576 PetscInt ibs,mbs; 1577 PetscBool issbaij; 1578 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1579 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1580 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1581 if (pcbddc->ksp_R) { /* already created ksp */ 1582 PetscInt nn_R; 1583 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1584 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1585 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1586 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1587 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1588 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1589 reuse = MAT_INITIAL_MATRIX; 1590 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1591 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1592 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1593 reuse = MAT_INITIAL_MATRIX; 1594 } else { /* safe to reuse the matrix */ 1595 reuse = MAT_REUSE_MATRIX; 1596 } 1597 } 1598 /* last check */ 1599 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1600 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1601 reuse = MAT_INITIAL_MATRIX; 1602 } 1603 } else { /* first time, so we need to create the matrix */ 1604 reuse = MAT_INITIAL_MATRIX; 1605 } 1606 /* extract A_RR */ 1607 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1608 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1610 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1611 if (matis->A == pcbddc->local_mat) { 1612 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1613 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1614 } else { 1615 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1616 } 1617 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1618 if (matis->A == pcbddc->local_mat) { 1619 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1620 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1621 } else { 1622 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1623 } 1624 } 1625 if (!sub_schurs->reuse_mumps) { 1626 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1627 if (pcbddc->local_mat->symmetric_set) { 1628 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1629 } 1630 } else { 1631 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1632 1633 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1634 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1635 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1636 } 1637 if (!pcbddc->ksp_R) { /* create object if not present */ 1638 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1639 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1640 /* default */ 1641 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1642 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1643 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1644 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1645 if (issbaij) { 1646 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1647 } else { 1648 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1649 } 1650 /* Allow user's customization */ 1651 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1652 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1653 } 1654 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1655 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1656 if (!n_R) { 1657 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1658 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1659 } 1660 /* Reuse MUMPS solver if it is present */ 1661 if (sub_schurs->reuse_mumps) { 1662 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1663 1664 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1665 } 1666 /* Set Up KSP for Neumann problem of BDDC */ 1667 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1668 } 1669 /* free Neumann problem's matrix */ 1670 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1671 1672 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1673 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1674 if (pcbddc->dbg_flag) { 1675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1676 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1677 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1678 } 1679 if (dirichlet) { /* Dirichlet */ 1680 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1681 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1682 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1683 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1684 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1685 /* need to be adapted? */ 1686 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1687 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1688 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1689 /* print info */ 1690 if (pcbddc->dbg_flag) { 1691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1693 } 1694 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1695 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1696 } 1697 } 1698 if (neumann) { /* Neumann */ 1699 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1700 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1701 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1702 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1703 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1704 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1705 /* need to be adapted? */ 1706 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1707 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1708 /* print info */ 1709 if (pcbddc->dbg_flag) { 1710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1712 } 1713 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1714 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1715 } 1716 } 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1723 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1724 { 1725 PetscErrorCode ierr; 1726 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1727 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1728 1729 PetscFunctionBegin; 1730 if (!sub_schurs->reuse_mumps) { 1731 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1732 } 1733 if (!pcbddc->switch_static) { 1734 if (applytranspose && pcbddc->local_auxmat1) { 1735 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1736 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1737 } 1738 if (!sub_schurs->reuse_mumps) { 1739 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1741 } else { 1742 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1743 1744 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1745 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1746 } 1747 } else { 1748 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1749 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1750 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1751 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 if (applytranspose && pcbddc->local_auxmat1) { 1753 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1754 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1755 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1756 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1757 } 1758 } 1759 if (!sub_schurs->reuse_mumps) { 1760 if (applytranspose) { 1761 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1762 } else { 1763 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1764 } 1765 #if defined(PETSC_HAVE_MUMPS) 1766 } else { 1767 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1768 1769 if (applytranspose) { 1770 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1771 } else { 1772 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1773 } 1774 #endif 1775 } 1776 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1777 if (!pcbddc->switch_static) { 1778 if (!sub_schurs->reuse_mumps) { 1779 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1780 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1781 } else { 1782 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1783 1784 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1785 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1786 } 1787 if (!applytranspose && pcbddc->local_auxmat1) { 1788 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1789 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1790 } 1791 } else { 1792 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1795 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1796 if (!applytranspose && pcbddc->local_auxmat1) { 1797 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1798 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1799 } 1800 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1802 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1803 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1811 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1812 { 1813 PetscErrorCode ierr; 1814 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1815 PC_IS* pcis = (PC_IS*) (pc->data); 1816 const PetscScalar zero = 0.0; 1817 1818 PetscFunctionBegin; 1819 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1820 if (applytranspose) { 1821 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1822 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1823 } else { 1824 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1825 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1826 } 1827 /* start communications from local primal nodes to rhs of coarse solver */ 1828 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1829 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1830 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1831 1832 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1833 /* TODO remove null space when doing multilevel */ 1834 if (pcbddc->coarse_ksp) { 1835 Vec rhs,sol; 1836 1837 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1838 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1839 if (applytranspose) { 1840 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1841 } else { 1842 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1843 } 1844 } 1845 1846 /* Local solution on R nodes */ 1847 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1848 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1849 } 1850 1851 /* communications from coarse sol to local primal nodes */ 1852 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1853 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1854 1855 /* Sum contributions from two levels */ 1856 if (applytranspose) { 1857 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1858 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1859 } else { 1860 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1861 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1868 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1869 { 1870 PetscErrorCode ierr; 1871 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1872 PetscScalar *array; 1873 Vec from,to; 1874 1875 PetscFunctionBegin; 1876 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1877 from = pcbddc->coarse_vec; 1878 to = pcbddc->vec1_P; 1879 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1880 Vec tvec; 1881 1882 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1883 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1884 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1885 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1886 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1887 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1888 } 1889 } else { /* from local to global -> put data in coarse right hand side */ 1890 from = pcbddc->vec1_P; 1891 to = pcbddc->coarse_vec; 1892 } 1893 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1899 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1900 { 1901 PetscErrorCode ierr; 1902 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1903 PetscScalar *array; 1904 Vec from,to; 1905 1906 PetscFunctionBegin; 1907 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1908 from = pcbddc->coarse_vec; 1909 to = pcbddc->vec1_P; 1910 } else { /* from local to global -> put data in coarse right hand side */ 1911 from = pcbddc->vec1_P; 1912 to = pcbddc->coarse_vec; 1913 } 1914 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1915 if (smode == SCATTER_FORWARD) { 1916 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1917 Vec tvec; 1918 1919 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1920 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1921 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1922 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1923 } 1924 } else { 1925 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1926 ierr = VecResetArray(from);CHKERRQ(ierr); 1927 } 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 /* uncomment for testing purposes */ 1933 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1936 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1937 { 1938 PetscErrorCode ierr; 1939 PC_IS* pcis = (PC_IS*)(pc->data); 1940 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1941 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1942 /* one and zero */ 1943 PetscScalar one=1.0,zero=0.0; 1944 /* space to store constraints and their local indices */ 1945 PetscScalar *constraints_data; 1946 PetscInt *constraints_idxs,*constraints_idxs_B; 1947 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1948 PetscInt *constraints_n; 1949 /* iterators */ 1950 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1951 /* BLAS integers */ 1952 PetscBLASInt lwork,lierr; 1953 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1954 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1955 /* reuse */ 1956 PetscInt olocal_primal_size,olocal_primal_size_cc; 1957 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1958 /* change of basis */ 1959 PetscBool qr_needed; 1960 PetscBT change_basis,qr_needed_idx; 1961 /* auxiliary stuff */ 1962 PetscInt *nnz,*is_indices; 1963 PetscInt ncc; 1964 /* some quantities */ 1965 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1966 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1967 1968 PetscFunctionBegin; 1969 /* Destroy Mat objects computed previously */ 1970 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1971 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1972 /* save info on constraints from previous setup (if any) */ 1973 olocal_primal_size = pcbddc->local_primal_size; 1974 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1975 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1976 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1977 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1978 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1979 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1980 1981 /* print some info */ 1982 if (pcbddc->dbg_flag) { 1983 IS vertices; 1984 PetscInt nv,nedges,nfaces; 1985 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1986 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1987 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1988 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1993 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1994 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1995 } 1996 1997 if (!pcbddc->adaptive_selection) { 1998 IS ISForVertices,*ISForFaces,*ISForEdges; 1999 MatNullSpace nearnullsp; 2000 const Vec *nearnullvecs; 2001 Vec *localnearnullsp; 2002 PetscScalar *array; 2003 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2004 PetscBool nnsp_has_cnst; 2005 /* LAPACK working arrays for SVD or POD */ 2006 PetscBool skip_lapack,boolforchange; 2007 PetscScalar *work; 2008 PetscReal *singular_vals; 2009 #if defined(PETSC_USE_COMPLEX) 2010 PetscReal *rwork; 2011 #endif 2012 #if defined(PETSC_MISSING_LAPACK_GESVD) 2013 PetscScalar *temp_basis,*correlation_mat; 2014 #else 2015 PetscBLASInt dummy_int=1; 2016 PetscScalar dummy_scalar=1.; 2017 #endif 2018 2019 /* Get index sets for faces, edges and vertices from graph */ 2020 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2021 /* free unneeded index sets */ 2022 if (!pcbddc->use_vertices) { 2023 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2024 } 2025 if (!pcbddc->use_edges) { 2026 for (i=0;i<n_ISForEdges;i++) { 2027 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2028 } 2029 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2030 n_ISForEdges = 0; 2031 } 2032 if (!pcbddc->use_faces) { 2033 for (i=0;i<n_ISForFaces;i++) { 2034 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2035 } 2036 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2037 n_ISForFaces = 0; 2038 } 2039 2040 #if defined(PETSC_USE_DEBUG) 2041 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2042 Also use_change_of_basis should be consistent among processors */ 2043 if (pcbddc->NullSpace) { 2044 PetscBool tbool[2],gbool[2]; 2045 2046 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2047 pcbddc->use_change_of_basis = PETSC_TRUE; 2048 if (!ISForEdges) { 2049 pcbddc->use_change_on_faces = PETSC_TRUE; 2050 } 2051 } 2052 tbool[0] = pcbddc->use_change_of_basis; 2053 tbool[1] = pcbddc->use_change_on_faces; 2054 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2055 pcbddc->use_change_of_basis = gbool[0]; 2056 pcbddc->use_change_on_faces = gbool[1]; 2057 } 2058 #endif 2059 2060 /* check if near null space is attached to global mat */ 2061 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2062 if (nearnullsp) { 2063 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2064 /* remove any stored info */ 2065 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2066 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2067 /* store information for BDDC solver reuse */ 2068 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2069 pcbddc->onearnullspace = nearnullsp; 2070 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2071 for (i=0;i<nnsp_size;i++) { 2072 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2073 } 2074 } else { /* if near null space is not provided BDDC uses constants by default */ 2075 nnsp_size = 0; 2076 nnsp_has_cnst = PETSC_TRUE; 2077 } 2078 /* get max number of constraints on a single cc */ 2079 max_constraints = nnsp_size; 2080 if (nnsp_has_cnst) max_constraints++; 2081 2082 /* 2083 Evaluate maximum storage size needed by the procedure 2084 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2085 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2086 There can be multiple constraints per connected component 2087 */ 2088 n_vertices = 0; 2089 if (ISForVertices) { 2090 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2091 } 2092 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2093 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2094 2095 total_counts = n_ISForFaces+n_ISForEdges; 2096 total_counts *= max_constraints; 2097 total_counts += n_vertices; 2098 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2099 2100 total_counts = 0; 2101 max_size_of_constraint = 0; 2102 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2103 IS used_is; 2104 if (i<n_ISForEdges) { 2105 used_is = ISForEdges[i]; 2106 } else { 2107 used_is = ISForFaces[i-n_ISForEdges]; 2108 } 2109 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2110 total_counts += j; 2111 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2112 } 2113 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2114 2115 /* get local part of global near null space vectors */ 2116 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2117 for (k=0;k<nnsp_size;k++) { 2118 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2119 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2121 } 2122 2123 /* whether or not to skip lapack calls */ 2124 skip_lapack = PETSC_TRUE; 2125 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2126 2127 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2128 if (!skip_lapack) { 2129 PetscScalar temp_work; 2130 2131 #if defined(PETSC_MISSING_LAPACK_GESVD) 2132 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2133 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2134 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2135 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2136 #if defined(PETSC_USE_COMPLEX) 2137 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2138 #endif 2139 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2140 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2141 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2142 lwork = -1; 2143 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2144 #if !defined(PETSC_USE_COMPLEX) 2145 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2146 #else 2147 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2148 #endif 2149 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2150 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2151 #else /* on missing GESVD */ 2152 /* SVD */ 2153 PetscInt max_n,min_n; 2154 max_n = max_size_of_constraint; 2155 min_n = max_constraints; 2156 if (max_size_of_constraint < max_constraints) { 2157 min_n = max_size_of_constraint; 2158 max_n = max_constraints; 2159 } 2160 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2161 #if defined(PETSC_USE_COMPLEX) 2162 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2163 #endif 2164 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2165 lwork = -1; 2166 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2167 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2168 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2169 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2170 #if !defined(PETSC_USE_COMPLEX) 2171 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2172 #else 2173 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2174 #endif 2175 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2176 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2177 #endif /* on missing GESVD */ 2178 /* Allocate optimal workspace */ 2179 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2180 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2181 } 2182 /* Now we can loop on constraining sets */ 2183 total_counts = 0; 2184 constraints_idxs_ptr[0] = 0; 2185 constraints_data_ptr[0] = 0; 2186 /* vertices */ 2187 if (n_vertices) { 2188 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2189 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2190 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2191 for (i=0;i<n_vertices;i++) { 2192 constraints_n[total_counts] = 1; 2193 constraints_data[total_counts] = 1.0; 2194 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2195 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2196 total_counts++; 2197 } 2198 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2199 PetscBool used_vertex; 2200 for (i=0;i<n_vertices;i++) { 2201 used_vertex = PETSC_FALSE; 2202 k = 0; 2203 while (!used_vertex && k<nnsp_size) { 2204 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2205 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2206 constraints_n[total_counts] = 1; 2207 constraints_idxs[total_counts] = is_indices[i]; 2208 constraints_data[total_counts] = 1.0; 2209 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2210 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2211 total_counts++; 2212 used_vertex = PETSC_TRUE; 2213 } 2214 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2215 k++; 2216 } 2217 } 2218 } 2219 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2220 n_vertices = total_counts; 2221 } 2222 2223 /* edges and faces */ 2224 total_counts_cc = total_counts; 2225 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2226 IS used_is; 2227 PetscBool idxs_copied = PETSC_FALSE; 2228 2229 if (ncc<n_ISForEdges) { 2230 used_is = ISForEdges[ncc]; 2231 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2232 } else { 2233 used_is = ISForFaces[ncc-n_ISForEdges]; 2234 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2235 } 2236 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2237 2238 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2239 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2240 /* change of basis should not be performed on local periodic nodes */ 2241 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2242 if (nnsp_has_cnst) { 2243 PetscScalar quad_value; 2244 2245 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2246 idxs_copied = PETSC_TRUE; 2247 2248 if (!pcbddc->use_nnsp_true) { 2249 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2250 } else { 2251 quad_value = 1.0; 2252 } 2253 for (j=0;j<size_of_constraint;j++) { 2254 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2255 } 2256 temp_constraints++; 2257 total_counts++; 2258 } 2259 for (k=0;k<nnsp_size;k++) { 2260 PetscReal real_value; 2261 PetscScalar *ptr_to_data; 2262 2263 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2264 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2265 for (j=0;j<size_of_constraint;j++) { 2266 ptr_to_data[j] = array[is_indices[j]]; 2267 } 2268 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2269 /* check if array is null on the connected component */ 2270 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2271 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2272 if (real_value > 0.0) { /* keep indices and values */ 2273 temp_constraints++; 2274 total_counts++; 2275 if (!idxs_copied) { 2276 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2277 idxs_copied = PETSC_TRUE; 2278 } 2279 } 2280 } 2281 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2282 valid_constraints = temp_constraints; 2283 if (!pcbddc->use_nnsp_true && temp_constraints) { 2284 if (temp_constraints == 1) { /* just normalize the constraint */ 2285 PetscScalar norm,*ptr_to_data; 2286 2287 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2288 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2289 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2290 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2291 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2292 } else { /* perform SVD */ 2293 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2294 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2295 2296 #if defined(PETSC_MISSING_LAPACK_GESVD) 2297 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2298 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2299 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2300 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2301 from that computed using LAPACKgesvd 2302 -> This is due to a different computation of eigenvectors in LAPACKheev 2303 -> The quality of the POD-computed basis will be the same */ 2304 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2305 /* Store upper triangular part of correlation matrix */ 2306 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2307 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2308 for (j=0;j<temp_constraints;j++) { 2309 for (k=0;k<j+1;k++) { 2310 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2311 } 2312 } 2313 /* compute eigenvalues and eigenvectors of correlation matrix */ 2314 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2315 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2316 #if !defined(PETSC_USE_COMPLEX) 2317 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2318 #else 2319 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2320 #endif 2321 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2322 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2323 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2324 j = 0; 2325 while (j < temp_constraints && singular_vals[j] < tol) j++; 2326 total_counts = total_counts-j; 2327 valid_constraints = temp_constraints-j; 2328 /* scale and copy POD basis into used quadrature memory */ 2329 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2331 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2333 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2334 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2335 if (j<temp_constraints) { 2336 PetscInt ii; 2337 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2338 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2339 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2340 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2341 for (k=0;k<temp_constraints-j;k++) { 2342 for (ii=0;ii<size_of_constraint;ii++) { 2343 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2344 } 2345 } 2346 } 2347 #else /* on missing GESVD */ 2348 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2349 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2350 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2351 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2352 #if !defined(PETSC_USE_COMPLEX) 2353 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2354 #else 2355 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2356 #endif 2357 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2358 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2359 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2360 k = temp_constraints; 2361 if (k > size_of_constraint) k = size_of_constraint; 2362 j = 0; 2363 while (j < k && singular_vals[k-j-1] < tol) j++; 2364 valid_constraints = k-j; 2365 total_counts = total_counts-temp_constraints+valid_constraints; 2366 #endif /* on missing GESVD */ 2367 } 2368 } 2369 /* update pointers information */ 2370 if (valid_constraints) { 2371 constraints_n[total_counts_cc] = valid_constraints; 2372 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2373 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2374 /* set change_of_basis flag */ 2375 if (boolforchange) { 2376 PetscBTSet(change_basis,total_counts_cc); 2377 } 2378 total_counts_cc++; 2379 } 2380 } 2381 /* free workspace */ 2382 if (!skip_lapack) { 2383 ierr = PetscFree(work);CHKERRQ(ierr); 2384 #if defined(PETSC_USE_COMPLEX) 2385 ierr = PetscFree(rwork);CHKERRQ(ierr); 2386 #endif 2387 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2388 #if defined(PETSC_MISSING_LAPACK_GESVD) 2389 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2390 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2391 #endif 2392 } 2393 for (k=0;k<nnsp_size;k++) { 2394 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2395 } 2396 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2397 /* free index sets of faces, edges and vertices */ 2398 for (i=0;i<n_ISForFaces;i++) { 2399 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2400 } 2401 if (n_ISForFaces) { 2402 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2403 } 2404 for (i=0;i<n_ISForEdges;i++) { 2405 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2406 } 2407 if (n_ISForEdges) { 2408 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2409 } 2410 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2411 } else { 2412 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2413 2414 total_counts = 0; 2415 n_vertices = 0; 2416 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2417 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2418 } 2419 max_constraints = 0; 2420 total_counts_cc = 0; 2421 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2422 total_counts += pcbddc->adaptive_constraints_n[i]; 2423 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2424 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2425 } 2426 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2427 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2428 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2429 constraints_data = pcbddc->adaptive_constraints_data; 2430 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2431 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2432 total_counts_cc = 0; 2433 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2434 if (pcbddc->adaptive_constraints_n[i]) { 2435 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2436 } 2437 } 2438 #if 0 2439 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2440 for (i=0;i<total_counts_cc;i++) { 2441 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2442 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2443 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2444 printf(" %d",constraints_idxs[j]); 2445 } 2446 printf("\n"); 2447 printf("number of cc: %d\n",constraints_n[i]); 2448 } 2449 for (i=0;i<n_vertices;i++) { 2450 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2451 } 2452 for (i=0;i<sub_schurs->n_subs;i++) { 2453 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2454 } 2455 #endif 2456 2457 max_size_of_constraint = 0; 2458 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2459 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2460 /* Change of basis */ 2461 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2462 if (pcbddc->use_change_of_basis) { 2463 for (i=0;i<sub_schurs->n_subs;i++) { 2464 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2465 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2466 } 2467 } 2468 } 2469 } 2470 pcbddc->local_primal_size = total_counts; 2471 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2472 2473 /* map constraints_idxs in boundary numbering */ 2474 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2475 if (i != constraints_idxs_ptr[total_counts_cc]) { 2476 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2477 } 2478 2479 /* Create constraint matrix */ 2480 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2481 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2482 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2483 2484 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2485 /* determine if a QR strategy is needed for change of basis */ 2486 qr_needed = PETSC_FALSE; 2487 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2488 total_primal_vertices=0; 2489 pcbddc->local_primal_size_cc = 0; 2490 for (i=0;i<total_counts_cc;i++) { 2491 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2492 if (size_of_constraint == 1) { 2493 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2494 pcbddc->local_primal_size_cc += 1; 2495 } else if (PetscBTLookup(change_basis,i)) { 2496 for (k=0;k<constraints_n[i];k++) { 2497 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2498 } 2499 pcbddc->local_primal_size_cc += constraints_n[i]; 2500 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2501 PetscBTSet(qr_needed_idx,i); 2502 qr_needed = PETSC_TRUE; 2503 } 2504 } else { 2505 pcbddc->local_primal_size_cc += 1; 2506 } 2507 } 2508 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2509 pcbddc->n_vertices = total_primal_vertices; 2510 /* permute indices in order to have a sorted set of vertices */ 2511 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2512 2513 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2514 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2515 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2516 2517 /* nonzero structure of constraint matrix */ 2518 /* and get reference dof for local constraints */ 2519 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2520 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2521 2522 j = total_primal_vertices; 2523 total_counts = total_primal_vertices; 2524 cum = total_primal_vertices; 2525 for (i=n_vertices;i<total_counts_cc;i++) { 2526 if (!PetscBTLookup(change_basis,i)) { 2527 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2528 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2529 cum++; 2530 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2531 for (k=0;k<constraints_n[i];k++) { 2532 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2533 nnz[j+k] = size_of_constraint; 2534 } 2535 j += constraints_n[i]; 2536 } 2537 } 2538 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2539 ierr = PetscFree(nnz);CHKERRQ(ierr); 2540 2541 /* set values in constraint matrix */ 2542 for (i=0;i<total_primal_vertices;i++) { 2543 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2544 } 2545 total_counts = total_primal_vertices; 2546 for (i=n_vertices;i<total_counts_cc;i++) { 2547 if (!PetscBTLookup(change_basis,i)) { 2548 PetscInt *cols; 2549 2550 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2551 cols = constraints_idxs+constraints_idxs_ptr[i]; 2552 for (k=0;k<constraints_n[i];k++) { 2553 PetscInt row = total_counts+k; 2554 PetscScalar *vals; 2555 2556 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2557 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2558 } 2559 total_counts += constraints_n[i]; 2560 } 2561 } 2562 /* assembling */ 2563 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2564 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2565 2566 /* 2567 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2568 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2569 */ 2570 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2571 if (pcbddc->use_change_of_basis) { 2572 /* dual and primal dofs on a single cc */ 2573 PetscInt dual_dofs,primal_dofs; 2574 /* working stuff for GEQRF */ 2575 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2576 PetscBLASInt lqr_work; 2577 /* working stuff for UNGQR */ 2578 PetscScalar *gqr_work,lgqr_work_t; 2579 PetscBLASInt lgqr_work; 2580 /* working stuff for TRTRS */ 2581 PetscScalar *trs_rhs; 2582 PetscBLASInt Blas_NRHS; 2583 /* pointers for values insertion into change of basis matrix */ 2584 PetscInt *start_rows,*start_cols; 2585 PetscScalar *start_vals; 2586 /* working stuff for values insertion */ 2587 PetscBT is_primal; 2588 PetscInt *aux_primal_numbering_B; 2589 /* matrix sizes */ 2590 PetscInt global_size,local_size; 2591 /* temporary change of basis */ 2592 Mat localChangeOfBasisMatrix; 2593 /* extra space for debugging */ 2594 PetscScalar *dbg_work; 2595 2596 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2597 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2598 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2599 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2600 /* nonzeros for local mat */ 2601 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2602 for (i=0;i<pcis->n;i++) nnz[i]=1; 2603 for (i=n_vertices;i<total_counts_cc;i++) { 2604 if (PetscBTLookup(change_basis,i)) { 2605 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2606 if (PetscBTLookup(qr_needed_idx,i)) { 2607 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2608 } else { 2609 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2610 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2611 } 2612 } 2613 } 2614 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2615 ierr = PetscFree(nnz);CHKERRQ(ierr); 2616 /* Set initial identity in the matrix */ 2617 for (i=0;i<pcis->n;i++) { 2618 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2619 } 2620 2621 if (pcbddc->dbg_flag) { 2622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2624 } 2625 2626 2627 /* Now we loop on the constraints which need a change of basis */ 2628 /* 2629 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2630 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2631 2632 Basic blocks of change of basis matrix T computed by 2633 2634 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2635 2636 | 1 0 ... 0 s_1/S | 2637 | 0 1 ... 0 s_2/S | 2638 | ... | 2639 | 0 ... 1 s_{n-1}/S | 2640 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2641 2642 with S = \sum_{i=1}^n s_i^2 2643 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2644 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2645 2646 - QR decomposition of constraints otherwise 2647 */ 2648 if (qr_needed) { 2649 /* space to store Q */ 2650 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2651 /* first we issue queries for optimal work */ 2652 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2653 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2654 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2655 lqr_work = -1; 2656 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2657 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2658 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2659 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2660 lgqr_work = -1; 2661 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2662 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2663 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2664 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2665 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2666 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2667 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2668 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2669 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2670 /* array to store scaling factors for reflectors */ 2671 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2672 /* array to store rhs and solution of triangular solver */ 2673 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2674 /* allocating workspace for check */ 2675 if (pcbddc->dbg_flag) { 2676 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2677 } 2678 } 2679 /* array to store whether a node is primal or not */ 2680 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2681 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2682 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2683 if (i != total_primal_vertices) { 2684 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2685 } 2686 for (i=0;i<total_primal_vertices;i++) { 2687 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2688 } 2689 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2690 2691 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2692 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2693 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2694 if (PetscBTLookup(change_basis,total_counts)) { 2695 /* get constraint info */ 2696 primal_dofs = constraints_n[total_counts]; 2697 dual_dofs = size_of_constraint-primal_dofs; 2698 2699 if (pcbddc->dbg_flag) { 2700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2701 } 2702 2703 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2704 2705 /* copy quadrature constraints for change of basis check */ 2706 if (pcbddc->dbg_flag) { 2707 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2708 } 2709 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2710 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2711 2712 /* compute QR decomposition of constraints */ 2713 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2714 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2716 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2717 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2718 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2719 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2720 2721 /* explictly compute R^-T */ 2722 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2723 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2724 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2726 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2727 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2728 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2729 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2730 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2731 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2732 2733 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2734 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2735 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2736 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2737 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2738 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2739 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2740 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2741 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2742 2743 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2744 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2745 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2746 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2747 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2748 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2749 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2750 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2751 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2752 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2753 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2754 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2755 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2756 2757 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2758 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2759 /* insert cols for primal dofs */ 2760 for (j=0;j<primal_dofs;j++) { 2761 start_vals = &qr_basis[j*size_of_constraint]; 2762 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2763 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2764 } 2765 /* insert cols for dual dofs */ 2766 for (j=0,k=0;j<dual_dofs;k++) { 2767 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2768 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2769 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2770 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2771 j++; 2772 } 2773 } 2774 2775 /* check change of basis */ 2776 if (pcbddc->dbg_flag) { 2777 PetscInt ii,jj; 2778 PetscBool valid_qr=PETSC_TRUE; 2779 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2783 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2784 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2785 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2786 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2787 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2788 for (jj=0;jj<size_of_constraint;jj++) { 2789 for (ii=0;ii<primal_dofs;ii++) { 2790 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2791 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2792 } 2793 } 2794 if (!valid_qr) { 2795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2796 for (jj=0;jj<size_of_constraint;jj++) { 2797 for (ii=0;ii<primal_dofs;ii++) { 2798 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2799 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2800 } 2801 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2802 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2803 } 2804 } 2805 } 2806 } else { 2807 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2808 } 2809 } 2810 } else { /* simple transformation block */ 2811 PetscInt row,col; 2812 PetscScalar val,norm; 2813 2814 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2815 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2816 for (j=0;j<size_of_constraint;j++) { 2817 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2818 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2819 if (!PetscBTLookup(is_primal,row_B)) { 2820 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2821 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2822 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2823 } else { 2824 for (k=0;k<size_of_constraint;k++) { 2825 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2826 if (row != col) { 2827 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2828 } else { 2829 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2830 } 2831 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2832 } 2833 } 2834 } 2835 if (pcbddc->dbg_flag) { 2836 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2837 } 2838 } 2839 } else { 2840 if (pcbddc->dbg_flag) { 2841 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2842 } 2843 } 2844 } 2845 2846 /* free workspace */ 2847 if (qr_needed) { 2848 if (pcbddc->dbg_flag) { 2849 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2850 } 2851 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2852 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2853 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2854 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2855 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2856 } 2857 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2858 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2859 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 2861 /* assembling of global change of variable */ 2862 { 2863 Mat tmat; 2864 PetscInt bs; 2865 2866 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2867 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2868 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2869 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2870 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2871 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2872 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2873 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2874 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2875 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2876 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2877 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2878 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2879 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2880 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2881 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2882 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2883 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2884 } 2885 /* check */ 2886 if (pcbddc->dbg_flag) { 2887 PetscReal error; 2888 Vec x,x_change; 2889 2890 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2891 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2892 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2893 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2894 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2895 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2896 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2897 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2898 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2899 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2900 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2901 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2902 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2903 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2904 ierr = VecDestroy(&x);CHKERRQ(ierr); 2905 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2906 } 2907 2908 /* adapt sub_schurs computed (if any) */ 2909 if (pcbddc->use_deluxe_scaling) { 2910 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2911 if (sub_schurs->S_Ej_all) { 2912 Mat S_new,tmat; 2913 IS is_all_N; 2914 2915 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2916 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2917 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2918 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2919 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2920 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2921 sub_schurs->S_Ej_all = S_new; 2922 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2923 if (sub_schurs->sum_S_Ej_all) { 2924 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2925 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2926 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2927 sub_schurs->sum_S_Ej_all = S_new; 2928 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2929 } 2930 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2931 } 2932 } 2933 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2934 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2935 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2936 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2937 } 2938 2939 /* set up change of basis context */ 2940 if (pcbddc->ChangeOfBasisMatrix) { 2941 PCBDDCChange_ctx change_ctx; 2942 2943 if (!pcbddc->new_global_mat) { 2944 PetscInt global_size,local_size; 2945 2946 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2947 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2948 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2949 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2950 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2951 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2952 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2953 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2954 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2955 } else { 2956 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2957 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2958 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2959 } 2960 if (!pcbddc->user_ChangeOfBasisMatrix) { 2961 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2962 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2963 } else { 2964 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2965 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2966 } 2967 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2968 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2969 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2970 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 } 2972 2973 /* check if a new primal space has been introduced */ 2974 pcbddc->new_primal_space_local = PETSC_TRUE; 2975 if (olocal_primal_size == pcbddc->local_primal_size) { 2976 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2977 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2978 if (!pcbddc->new_primal_space_local) { 2979 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2980 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2981 } 2982 } 2983 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2984 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2985 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2986 2987 /* flush dbg viewer */ 2988 if (pcbddc->dbg_flag) { 2989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2990 } 2991 2992 /* free workspace */ 2993 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2994 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2995 if (!pcbddc->adaptive_selection) { 2996 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2997 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2998 } else { 2999 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3000 pcbddc->adaptive_constraints_idxs_ptr, 3001 pcbddc->adaptive_constraints_data_ptr, 3002 pcbddc->adaptive_constraints_idxs, 3003 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3004 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3005 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3006 } 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3012 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3013 { 3014 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3015 PC_IS *pcis = (PC_IS*)pc->data; 3016 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3017 PetscInt ierr,i,vertex_size,N; 3018 PetscViewer viewer=pcbddc->dbg_viewer; 3019 3020 PetscFunctionBegin; 3021 /* Reset previously computed graph */ 3022 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3023 /* Init local Graph struct */ 3024 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3025 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3026 3027 /* Check validity of the csr graph passed in by the user */ 3028 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3029 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3030 } 3031 3032 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3033 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3034 PetscInt *xadj,*adjncy; 3035 PetscInt nvtxs; 3036 PetscBool flg_row=PETSC_FALSE; 3037 3038 if (pcbddc->use_local_adj) { 3039 3040 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3041 if (flg_row) { 3042 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3043 pcbddc->computed_rowadj = PETSC_TRUE; 3044 } 3045 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3046 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3047 IS is_dummy; 3048 ISLocalToGlobalMapping l2gmap_dummy; 3049 PetscInt j,sum; 3050 PetscInt *cxadj,*cadjncy; 3051 const PetscInt *idxs; 3052 PCBDDCGraph graph; 3053 PetscBT is_on_boundary; 3054 3055 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3056 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3057 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3058 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3059 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3060 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3061 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3062 if (flg_row) { 3063 graph->xadj = xadj; 3064 graph->adjncy = adjncy; 3065 } 3066 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3067 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3068 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3069 3070 if (pcbddc->dbg_flag) { 3071 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3072 for (i=0;i<graph->ncc;i++) { 3073 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3074 } 3075 } 3076 3077 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3078 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3079 for (i=0;i<pcis->n_B;i++) { 3080 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3081 } 3082 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3083 3084 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3085 sum = 0; 3086 for (i=0;i<graph->ncc;i++) { 3087 PetscInt sizecc = 0; 3088 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3089 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3090 sizecc++; 3091 } 3092 } 3093 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3094 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3095 cxadj[graph->queue[j]] = sizecc; 3096 } 3097 } 3098 sum += sizecc*sizecc; 3099 } 3100 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3101 sum = 0; 3102 for (i=0;i<pcis->n;i++) { 3103 PetscInt temp = cxadj[i]; 3104 cxadj[i] = sum; 3105 sum += temp; 3106 } 3107 cxadj[pcis->n] = sum; 3108 for (i=0;i<graph->ncc;i++) { 3109 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3110 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3111 PetscInt k,sizecc = 0; 3112 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3113 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3114 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3115 sizecc++; 3116 } 3117 } 3118 } 3119 } 3120 } 3121 if (sum) { 3122 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3123 } else { 3124 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3125 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3126 } 3127 graph->xadj = 0; 3128 graph->adjncy = 0; 3129 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3130 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3131 } 3132 } 3133 if (pcbddc->dbg_flag) { 3134 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3135 } 3136 3137 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3138 vertex_size = 1; 3139 if (pcbddc->user_provided_isfordofs) { 3140 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3141 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3142 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3143 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3144 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3145 } 3146 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3147 pcbddc->n_ISForDofs = 0; 3148 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3149 } 3150 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3151 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3152 } else { 3153 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3154 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3155 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3156 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3157 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3158 } 3159 } 3160 } 3161 3162 /* Setup of Graph */ 3163 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3164 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3165 } 3166 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3167 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3168 } 3169 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3170 3171 /* Graph's connected components analysis */ 3172 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3173 3174 /* print some info to stdout */ 3175 if (pcbddc->dbg_flag) { 3176 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3177 } 3178 3179 /* mark topography has done */ 3180 pcbddc->recompute_topography = PETSC_FALSE; 3181 PetscFunctionReturn(0); 3182 } 3183 3184 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3185 #undef __FUNCT__ 3186 #define __FUNCT__ "PCBDDCSubsetNumbering" 3187 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3188 { 3189 PetscSF sf; 3190 PetscLayout map; 3191 const PetscInt *idxs; 3192 PetscInt *leaf_data,*root_data,*gidxs; 3193 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3194 PetscInt n_n,nlocals,start,first_index; 3195 PetscMPIInt commsize; 3196 PetscBool first_found; 3197 PetscErrorCode ierr; 3198 3199 PetscFunctionBegin; 3200 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3201 if (subset_mult) { 3202 PetscCheckSameComm(subset,1,subset_mult,2); 3203 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3204 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3205 } 3206 /* create workspace layout for computing global indices of subset */ 3207 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3208 lbounds[0] = lbounds[1] = 0; 3209 for (i=0;i<n;i++) { 3210 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3211 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3212 } 3213 lbounds[0] = -lbounds[0]; 3214 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3215 gbounds[0] = -gbounds[0]; 3216 N = gbounds[1] - gbounds[0] + 1; 3217 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3218 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3219 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3220 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3221 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3222 3223 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3224 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3225 if (subset_mult) { 3226 const PetscInt* idxs_mult; 3227 3228 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3229 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3230 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3231 } else { 3232 for (i=0;i<n;i++) leaf_data[i] = 1; 3233 } 3234 /* local size of new subset */ 3235 n_n = 0; 3236 for (i=0;i<n;i++) n_n += leaf_data[i]; 3237 3238 /* global indexes in layout */ 3239 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3240 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3241 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3242 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3243 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3244 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3245 3246 /* reduce from leaves to roots */ 3247 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3248 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3249 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3250 3251 /* count indexes in local part of layout */ 3252 nlocals = 0; 3253 first_index = -1; 3254 first_found = PETSC_FALSE; 3255 for (i=0;i<Nl;i++) { 3256 if (!first_found && root_data[i]) { 3257 first_found = PETSC_TRUE; 3258 first_index = i; 3259 } 3260 nlocals += root_data[i]; 3261 } 3262 3263 /* cumulative of number of indexes and size of subset without holes */ 3264 #if defined(PETSC_HAVE_MPI_EXSCAN) 3265 start = 0; 3266 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3267 #else 3268 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3269 start = start-nlocals; 3270 #endif 3271 3272 if (N_n) { /* compute total size of new subset if requested */ 3273 *N_n = start + nlocals; 3274 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3275 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3276 } 3277 3278 /* adapt root data with cumulative */ 3279 if (first_found) { 3280 PetscInt old_index; 3281 3282 root_data[first_index] += start; 3283 old_index = first_index; 3284 for (i=first_index+1;i<Nl;i++) { 3285 if (root_data[i]) { 3286 root_data[i] += root_data[old_index]; 3287 old_index = i; 3288 } 3289 } 3290 } 3291 3292 /* from roots to leaves */ 3293 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3294 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3295 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3296 3297 /* create new IS with global indexes without holes */ 3298 if (subset_mult) { 3299 const PetscInt* idxs_mult; 3300 PetscInt cum; 3301 3302 cum = 0; 3303 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3304 for (i=0;i<n;i++) { 3305 PetscInt j; 3306 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3307 } 3308 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3309 } else { 3310 for (i=0;i<n;i++) { 3311 gidxs[i] = leaf_data[i]-1; 3312 } 3313 } 3314 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3315 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3316 PetscFunctionReturn(0); 3317 } 3318 3319 #undef __FUNCT__ 3320 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3321 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3322 { 3323 PetscInt i,j; 3324 PetscScalar *alphas; 3325 PetscErrorCode ierr; 3326 3327 PetscFunctionBegin; 3328 /* this implements stabilized Gram-Schmidt */ 3329 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3330 for (i=0;i<n;i++) { 3331 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3332 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3333 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3334 } 3335 ierr = PetscFree(alphas);CHKERRQ(ierr); 3336 PetscFunctionReturn(0); 3337 } 3338 3339 #undef __FUNCT__ 3340 #define __FUNCT__ "MatISGetSubassemblingPattern" 3341 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3342 { 3343 IS ranks_send_to; 3344 PetscInt n_neighs,*neighs,*n_shared,**shared; 3345 PetscMPIInt size,rank,color; 3346 PetscInt *xadj,*adjncy; 3347 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3348 PetscInt i,local_size,threshold=0; 3349 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3350 PetscSubcomm subcomm; 3351 PetscErrorCode ierr; 3352 3353 PetscFunctionBegin; 3354 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3355 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3356 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3357 3358 /* Get info on mapping */ 3359 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3360 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3361 3362 /* build local CSR graph of subdomains' connectivity */ 3363 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3364 xadj[0] = 0; 3365 xadj[1] = PetscMax(n_neighs-1,0); 3366 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3367 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3368 3369 if (threshold) { 3370 PetscInt xadj_count = 0; 3371 for (i=1;i<n_neighs;i++) { 3372 if (n_shared[i] > threshold) { 3373 adjncy[xadj_count] = neighs[i]; 3374 adjncy_wgt[xadj_count] = n_shared[i]; 3375 xadj_count++; 3376 } 3377 } 3378 xadj[1] = xadj_count; 3379 } else { 3380 if (xadj[1]) { 3381 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3382 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3383 } 3384 } 3385 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3386 if (use_square) { 3387 for (i=0;i<xadj[1];i++) { 3388 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3389 } 3390 } 3391 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3392 3393 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3394 3395 /* 3396 Restrict work on active processes only. 3397 */ 3398 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3399 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3400 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3401 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3402 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3403 if (color) { 3404 ierr = PetscFree(xadj);CHKERRQ(ierr); 3405 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3406 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3407 } else { 3408 Mat subdomain_adj; 3409 IS new_ranks,new_ranks_contig; 3410 MatPartitioning partitioner; 3411 PetscInt prank,rstart=0,rend=0; 3412 PetscInt *is_indices,*oldranks; 3413 PetscBool aggregate; 3414 3415 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3416 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3417 prank = rank; 3418 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3419 /* 3420 for (i=0;i<size;i++) { 3421 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3422 } 3423 */ 3424 for (i=0;i<xadj[1];i++) { 3425 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3426 } 3427 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3428 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3429 if (aggregate) { 3430 PetscInt lrows,row,ncols,*cols; 3431 PetscMPIInt nrank; 3432 PetscScalar *vals; 3433 3434 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3435 lrows = 0; 3436 if (nrank<redprocs) { 3437 lrows = size/redprocs; 3438 if (nrank<size%redprocs) lrows++; 3439 } 3440 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3441 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3442 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3443 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3444 row = nrank; 3445 ncols = xadj[1]-xadj[0]; 3446 cols = adjncy; 3447 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3448 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3449 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3450 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3451 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3452 ierr = PetscFree(xadj);CHKERRQ(ierr); 3453 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3454 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3455 ierr = PetscFree(vals);CHKERRQ(ierr); 3456 } else { 3457 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3458 } 3459 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3460 3461 /* Partition */ 3462 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3463 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3464 if (use_vwgt) { 3465 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3466 v_wgt[0] = local_size; 3467 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3468 } 3469 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3470 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3471 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3472 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3473 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3474 3475 /* renumber new_ranks to avoid "holes" in new set of processors */ 3476 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3477 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3478 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3479 if (!redprocs) { 3480 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3481 } else { 3482 PetscInt idxs[1]; 3483 PetscMPIInt tag; 3484 MPI_Request *reqs; 3485 3486 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3487 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3488 for (i=rstart;i<rend;i++) { 3489 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3490 } 3491 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3492 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3493 ierr = PetscFree(reqs);CHKERRQ(ierr); 3494 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3495 } 3496 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3497 /* clean up */ 3498 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3499 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3500 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3501 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3502 } 3503 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3504 3505 /* assemble parallel IS for sends */ 3506 i = 1; 3507 if (color) i=0; 3508 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3509 /* get back IS */ 3510 *is_sends = ranks_send_to; 3511 PetscFunctionReturn(0); 3512 } 3513 3514 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3515 3516 #undef __FUNCT__ 3517 #define __FUNCT__ "MatISSubassemble" 3518 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3519 { 3520 Mat local_mat; 3521 IS is_sends_internal; 3522 PetscInt rows,cols,new_local_rows; 3523 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3524 PetscBool ismatis,isdense,newisdense,destroy_mat; 3525 ISLocalToGlobalMapping l2gmap; 3526 PetscInt* l2gmap_indices; 3527 const PetscInt* is_indices; 3528 MatType new_local_type; 3529 /* buffers */ 3530 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3531 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3532 PetscInt *recv_buffer_idxs_local; 3533 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3534 /* MPI */ 3535 MPI_Comm comm,comm_n; 3536 PetscSubcomm subcomm; 3537 PetscMPIInt n_sends,n_recvs,commsize; 3538 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3539 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3540 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3541 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3542 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3543 PetscErrorCode ierr; 3544 3545 PetscFunctionBegin; 3546 /* TODO: add missing checks */ 3547 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3548 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3549 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3550 PetscValidLogicalCollectiveInt(mat,nis,7); 3551 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3552 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3553 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3554 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3555 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3556 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3557 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3558 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3559 PetscInt mrows,mcols,mnrows,mncols; 3560 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3561 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3562 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3563 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3564 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3565 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3566 } 3567 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3568 PetscValidLogicalCollectiveInt(mat,bs,0); 3569 /* prepare IS for sending if not provided */ 3570 if (!is_sends) { 3571 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3572 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3573 } else { 3574 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3575 is_sends_internal = is_sends; 3576 } 3577 3578 /* get comm */ 3579 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3580 3581 /* compute number of sends */ 3582 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3583 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3584 3585 /* compute number of receives */ 3586 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3587 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3588 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3589 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3590 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3591 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3592 ierr = PetscFree(iflags);CHKERRQ(ierr); 3593 3594 /* restrict comm if requested */ 3595 subcomm = 0; 3596 destroy_mat = PETSC_FALSE; 3597 if (restrict_comm) { 3598 PetscMPIInt color,subcommsize; 3599 3600 color = 0; 3601 if (restrict_full) { 3602 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3603 } else { 3604 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3605 } 3606 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3607 subcommsize = commsize - subcommsize; 3608 /* check if reuse has been requested */ 3609 if (reuse == MAT_REUSE_MATRIX) { 3610 if (*mat_n) { 3611 PetscMPIInt subcommsize2; 3612 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3613 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3614 comm_n = PetscObjectComm((PetscObject)*mat_n); 3615 } else { 3616 comm_n = PETSC_COMM_SELF; 3617 } 3618 } else { /* MAT_INITIAL_MATRIX */ 3619 PetscMPIInt rank; 3620 3621 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3622 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3623 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3624 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3625 comm_n = PetscSubcommChild(subcomm); 3626 } 3627 /* flag to destroy *mat_n if not significative */ 3628 if (color) destroy_mat = PETSC_TRUE; 3629 } else { 3630 comm_n = comm; 3631 } 3632 3633 /* prepare send/receive buffers */ 3634 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3635 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3636 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3637 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3638 if (nis) { 3639 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3640 } 3641 3642 /* Get data from local matrices */ 3643 if (!isdense) { 3644 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3645 /* TODO: See below some guidelines on how to prepare the local buffers */ 3646 /* 3647 send_buffer_vals should contain the raw values of the local matrix 3648 send_buffer_idxs should contain: 3649 - MatType_PRIVATE type 3650 - PetscInt size_of_l2gmap 3651 - PetscInt global_row_indices[size_of_l2gmap] 3652 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3653 */ 3654 } else { 3655 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3656 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3657 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3658 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3659 send_buffer_idxs[1] = i; 3660 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3661 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3662 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3663 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3664 for (i=0;i<n_sends;i++) { 3665 ilengths_vals[is_indices[i]] = len*len; 3666 ilengths_idxs[is_indices[i]] = len+2; 3667 } 3668 } 3669 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3670 /* additional is (if any) */ 3671 if (nis) { 3672 PetscMPIInt psum; 3673 PetscInt j; 3674 for (j=0,psum=0;j<nis;j++) { 3675 PetscInt plen; 3676 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3677 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3678 psum += len+1; /* indices + lenght */ 3679 } 3680 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3681 for (j=0,psum=0;j<nis;j++) { 3682 PetscInt plen; 3683 const PetscInt *is_array_idxs; 3684 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3685 send_buffer_idxs_is[psum] = plen; 3686 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3687 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3688 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3689 psum += plen+1; /* indices + lenght */ 3690 } 3691 for (i=0;i<n_sends;i++) { 3692 ilengths_idxs_is[is_indices[i]] = psum; 3693 } 3694 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3695 } 3696 3697 buf_size_idxs = 0; 3698 buf_size_vals = 0; 3699 buf_size_idxs_is = 0; 3700 for (i=0;i<n_recvs;i++) { 3701 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3702 buf_size_vals += (PetscInt)olengths_vals[i]; 3703 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3704 } 3705 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3706 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3707 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3708 3709 /* get new tags for clean communications */ 3710 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3711 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3712 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3713 3714 /* allocate for requests */ 3715 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3716 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3717 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3718 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3719 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3720 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3721 3722 /* communications */ 3723 ptr_idxs = recv_buffer_idxs; 3724 ptr_vals = recv_buffer_vals; 3725 ptr_idxs_is = recv_buffer_idxs_is; 3726 for (i=0;i<n_recvs;i++) { 3727 source_dest = onodes[i]; 3728 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3729 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3730 ptr_idxs += olengths_idxs[i]; 3731 ptr_vals += olengths_vals[i]; 3732 if (nis) { 3733 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3734 ptr_idxs_is += olengths_idxs_is[i]; 3735 } 3736 } 3737 for (i=0;i<n_sends;i++) { 3738 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3739 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3740 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3741 if (nis) { 3742 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3743 } 3744 } 3745 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3746 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3747 3748 /* assemble new l2g map */ 3749 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3750 ptr_idxs = recv_buffer_idxs; 3751 new_local_rows = 0; 3752 for (i=0;i<n_recvs;i++) { 3753 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3754 ptr_idxs += olengths_idxs[i]; 3755 } 3756 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3757 ptr_idxs = recv_buffer_idxs; 3758 new_local_rows = 0; 3759 for (i=0;i<n_recvs;i++) { 3760 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3761 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3762 ptr_idxs += olengths_idxs[i]; 3763 } 3764 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3765 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3766 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3767 3768 /* infer new local matrix type from received local matrices type */ 3769 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3770 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3771 if (n_recvs) { 3772 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3773 ptr_idxs = recv_buffer_idxs; 3774 for (i=0;i<n_recvs;i++) { 3775 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3776 new_local_type_private = MATAIJ_PRIVATE; 3777 break; 3778 } 3779 ptr_idxs += olengths_idxs[i]; 3780 } 3781 switch (new_local_type_private) { 3782 case MATDENSE_PRIVATE: 3783 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3784 new_local_type = MATSEQAIJ; 3785 bs = 1; 3786 } else { /* if I receive only 1 dense matrix */ 3787 new_local_type = MATSEQDENSE; 3788 bs = 1; 3789 } 3790 break; 3791 case MATAIJ_PRIVATE: 3792 new_local_type = MATSEQAIJ; 3793 bs = 1; 3794 break; 3795 case MATBAIJ_PRIVATE: 3796 new_local_type = MATSEQBAIJ; 3797 break; 3798 case MATSBAIJ_PRIVATE: 3799 new_local_type = MATSEQSBAIJ; 3800 break; 3801 default: 3802 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3803 break; 3804 } 3805 } else { /* by default, new_local_type is seqdense */ 3806 new_local_type = MATSEQDENSE; 3807 bs = 1; 3808 } 3809 3810 /* create MATIS object if needed */ 3811 if (reuse == MAT_INITIAL_MATRIX) { 3812 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3813 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3814 } else { 3815 /* it also destroys the local matrices */ 3816 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3817 } 3818 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3819 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3820 3821 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3822 3823 /* Global to local map of received indices */ 3824 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3825 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3826 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3827 3828 /* restore attributes -> type of incoming data and its size */ 3829 buf_size_idxs = 0; 3830 for (i=0;i<n_recvs;i++) { 3831 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3832 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3833 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3834 } 3835 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3836 3837 /* set preallocation */ 3838 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3839 if (!newisdense) { 3840 PetscInt *new_local_nnz=0; 3841 3842 ptr_vals = recv_buffer_vals; 3843 ptr_idxs = recv_buffer_idxs_local; 3844 if (n_recvs) { 3845 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3846 } 3847 for (i=0;i<n_recvs;i++) { 3848 PetscInt j; 3849 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3850 for (j=0;j<*(ptr_idxs+1);j++) { 3851 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3852 } 3853 } else { 3854 /* TODO */ 3855 } 3856 ptr_idxs += olengths_idxs[i]; 3857 } 3858 if (new_local_nnz) { 3859 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3860 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3861 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3862 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3863 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3864 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3865 } else { 3866 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3867 } 3868 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3869 } else { 3870 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3871 } 3872 3873 /* set values */ 3874 ptr_vals = recv_buffer_vals; 3875 ptr_idxs = recv_buffer_idxs_local; 3876 for (i=0;i<n_recvs;i++) { 3877 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3878 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3879 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3880 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3881 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3882 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3883 } else { 3884 /* TODO */ 3885 } 3886 ptr_idxs += olengths_idxs[i]; 3887 ptr_vals += olengths_vals[i]; 3888 } 3889 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3894 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3895 3896 #if 0 3897 if (!restrict_comm) { /* check */ 3898 Vec lvec,rvec; 3899 PetscReal infty_error; 3900 3901 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3902 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3903 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3904 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3905 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3906 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3907 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3908 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3909 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3910 } 3911 #endif 3912 3913 /* assemble new additional is (if any) */ 3914 if (nis) { 3915 PetscInt **temp_idxs,*count_is,j,psum; 3916 3917 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3918 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3919 ptr_idxs = recv_buffer_idxs_is; 3920 psum = 0; 3921 for (i=0;i<n_recvs;i++) { 3922 for (j=0;j<nis;j++) { 3923 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3924 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3925 psum += plen; 3926 ptr_idxs += plen+1; /* shift pointer to received data */ 3927 } 3928 } 3929 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3930 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3931 for (i=1;i<nis;i++) { 3932 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3933 } 3934 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3935 ptr_idxs = recv_buffer_idxs_is; 3936 for (i=0;i<n_recvs;i++) { 3937 for (j=0;j<nis;j++) { 3938 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3939 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3940 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3941 ptr_idxs += plen+1; /* shift pointer to received data */ 3942 } 3943 } 3944 for (i=0;i<nis;i++) { 3945 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3946 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3947 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3948 } 3949 ierr = PetscFree(count_is);CHKERRQ(ierr); 3950 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3951 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3952 } 3953 /* free workspace */ 3954 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3955 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3956 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3957 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3958 if (isdense) { 3959 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3960 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3961 } else { 3962 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3963 } 3964 if (nis) { 3965 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3966 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3967 } 3968 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3969 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3970 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3971 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3972 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3973 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3974 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3975 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3976 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3977 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3978 ierr = PetscFree(onodes);CHKERRQ(ierr); 3979 if (nis) { 3980 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3981 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3982 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3983 } 3984 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3985 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3986 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3987 for (i=0;i<nis;i++) { 3988 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3989 } 3990 *mat_n = NULL; 3991 } 3992 PetscFunctionReturn(0); 3993 } 3994 3995 /* temporary hack into ksp private data structure */ 3996 #include <petsc/private/kspimpl.h> 3997 3998 #undef __FUNCT__ 3999 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4000 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4001 { 4002 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4003 PC_IS *pcis = (PC_IS*)pc->data; 4004 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4005 MatNullSpace CoarseNullSpace=NULL; 4006 ISLocalToGlobalMapping coarse_islg; 4007 IS coarse_is,*isarray; 4008 PetscInt i,im_active=-1,active_procs=-1; 4009 PetscInt nis,nisdofs,nisneu; 4010 PC pc_temp; 4011 PCType coarse_pc_type; 4012 KSPType coarse_ksp_type; 4013 PetscBool multilevel_requested,multilevel_allowed; 4014 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4015 Mat t_coarse_mat_is; 4016 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4017 PetscMPIInt all_procs; 4018 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4019 PetscBool compute_vecs = PETSC_FALSE; 4020 PetscScalar *array; 4021 PetscErrorCode ierr; 4022 4023 PetscFunctionBegin; 4024 /* Assign global numbering to coarse dofs */ 4025 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4026 PetscInt ocoarse_size; 4027 compute_vecs = PETSC_TRUE; 4028 ocoarse_size = pcbddc->coarse_size; 4029 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4030 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4031 /* see if we can avoid some work */ 4032 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4033 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4034 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4035 PC pc; 4036 PetscBool isbddc; 4037 4038 /* temporary workaround since PCBDDC does not have a reset method so far */ 4039 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4040 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4041 if (isbddc) { 4042 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4043 } 4044 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4045 coarse_reuse = PETSC_FALSE; 4046 } else { /* we can safely reuse already computed coarse matrix */ 4047 coarse_reuse = PETSC_TRUE; 4048 } 4049 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4050 coarse_reuse = PETSC_FALSE; 4051 } 4052 /* reset any subassembling information */ 4053 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4054 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4055 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4056 coarse_reuse = PETSC_TRUE; 4057 } 4058 4059 /* count "active" (i.e. with positive local size) and "void" processes */ 4060 im_active = !!(pcis->n); 4061 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4062 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4063 void_procs = all_procs-active_procs; 4064 csin_type_simple = PETSC_TRUE; 4065 redist = PETSC_FALSE; 4066 if (pcbddc->current_level && void_procs) { 4067 csin_ml = PETSC_TRUE; 4068 ncoarse_ml = void_procs; 4069 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4070 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4071 csin_ds = PETSC_TRUE; 4072 ncoarse_ds = pcbddc->redistribute_coarse; 4073 redist = PETSC_TRUE; 4074 } else { 4075 csin_ds = PETSC_TRUE; 4076 ncoarse_ds = active_procs; 4077 redist = PETSC_TRUE; 4078 } 4079 } else { 4080 csin_ml = PETSC_FALSE; 4081 ncoarse_ml = all_procs; 4082 if (void_procs) { 4083 csin_ds = PETSC_TRUE; 4084 ncoarse_ds = void_procs; 4085 csin_type_simple = PETSC_FALSE; 4086 } else { 4087 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4088 csin_ds = PETSC_TRUE; 4089 ncoarse_ds = pcbddc->redistribute_coarse; 4090 redist = PETSC_TRUE; 4091 } else { 4092 csin_ds = PETSC_FALSE; 4093 ncoarse_ds = all_procs; 4094 } 4095 } 4096 } 4097 4098 /* 4099 test if we can go multilevel: three conditions must be satisfied: 4100 - we have not exceeded the number of levels requested 4101 - we can actually subassemble the active processes 4102 - we can find a suitable number of MPI processes where we can place the subassembled problem 4103 */ 4104 multilevel_allowed = PETSC_FALSE; 4105 multilevel_requested = PETSC_FALSE; 4106 if (pcbddc->current_level < pcbddc->max_levels) { 4107 multilevel_requested = PETSC_TRUE; 4108 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4109 multilevel_allowed = PETSC_FALSE; 4110 } else { 4111 multilevel_allowed = PETSC_TRUE; 4112 } 4113 } 4114 /* determine number of process partecipating to coarse solver */ 4115 if (multilevel_allowed) { 4116 ncoarse = ncoarse_ml; 4117 csin = csin_ml; 4118 redist = PETSC_FALSE; 4119 } else { 4120 ncoarse = ncoarse_ds; 4121 csin = csin_ds; 4122 } 4123 4124 /* creates temporary l2gmap and IS for coarse indexes */ 4125 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4126 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4127 4128 /* creates temporary MATIS object for coarse matrix */ 4129 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4130 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4131 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4132 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4133 #if 0 4134 { 4135 PetscViewer viewer; 4136 char filename[256]; 4137 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4138 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4139 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4140 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4141 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4142 } 4143 #endif 4144 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4145 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4146 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4147 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4148 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4149 4150 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4151 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4152 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4153 const PetscInt *idxs; 4154 ISLocalToGlobalMapping tmap; 4155 4156 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4157 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4158 /* allocate space for temporary storage */ 4159 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4160 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4161 /* allocate for IS array */ 4162 nisdofs = pcbddc->n_ISForDofsLocal; 4163 nisneu = !!pcbddc->NeumannBoundariesLocal; 4164 nis = nisdofs + nisneu; 4165 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4166 /* dofs splitting */ 4167 for (i=0;i<nisdofs;i++) { 4168 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4169 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4170 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4171 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4172 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4173 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4174 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4175 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4176 } 4177 /* neumann boundaries */ 4178 if (pcbddc->NeumannBoundariesLocal) { 4179 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4180 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4181 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4182 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4183 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4184 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4185 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4186 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4187 } 4188 /* free memory */ 4189 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4190 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4191 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4192 } else { 4193 nis = 0; 4194 nisdofs = 0; 4195 nisneu = 0; 4196 isarray = NULL; 4197 } 4198 /* destroy no longer needed map */ 4199 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4200 4201 /* restrict on coarse candidates (if needed) */ 4202 coarse_mat_is = NULL; 4203 if (csin) { 4204 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4205 if (redist) { 4206 PetscMPIInt rank; 4207 PetscInt spc,n_spc_p1,dest[1],destsize; 4208 4209 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4210 spc = active_procs/ncoarse; 4211 n_spc_p1 = active_procs%ncoarse; 4212 if (im_active) { 4213 destsize = 1; 4214 if (rank > n_spc_p1*(spc+1)-1) { 4215 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4216 } else { 4217 dest[0] = rank/(spc+1); 4218 } 4219 } else { 4220 destsize = 0; 4221 } 4222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4223 } else if (csin_type_simple) { 4224 PetscMPIInt rank; 4225 PetscInt issize,isidx; 4226 4227 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4228 if (im_active) { 4229 issize = 1; 4230 isidx = (PetscInt)rank; 4231 } else { 4232 issize = 0; 4233 isidx = -1; 4234 } 4235 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4236 } else { /* get a suitable subassembling pattern from MATIS code */ 4237 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4238 } 4239 4240 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4241 if (!redist || ncoarse <= void_procs) { 4242 PetscInt ncoarse_cand,tissize,*nisindices; 4243 PetscInt *coarse_candidates; 4244 const PetscInt* tisindices; 4245 4246 /* get coarse candidates' ranks in pc communicator */ 4247 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4248 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4249 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4250 if (!coarse_candidates[i]) { 4251 coarse_candidates[ncoarse_cand++]=i; 4252 } 4253 } 4254 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4255 4256 4257 if (pcbddc->dbg_flag) { 4258 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4259 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4260 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4261 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4262 for (i=0;i<ncoarse_cand;i++) { 4263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4264 } 4265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4266 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4267 } 4268 /* shift the pattern on coarse candidates */ 4269 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4270 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4271 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4272 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4273 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4274 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4275 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4276 } 4277 if (pcbddc->dbg_flag) { 4278 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4279 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4280 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4281 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4282 } 4283 } 4284 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4285 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4286 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4287 } else { /* this is the last level, so use just receiving processes in subcomm */ 4288 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4289 } 4290 } else { 4291 if (pcbddc->dbg_flag) { 4292 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4293 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4294 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4295 } 4296 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4297 coarse_mat_is = t_coarse_mat_is; 4298 } 4299 4300 /* create local to global scatters for coarse problem */ 4301 if (compute_vecs) { 4302 PetscInt lrows; 4303 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4304 if (coarse_mat_is) { 4305 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4306 } else { 4307 lrows = 0; 4308 } 4309 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4310 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4311 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4312 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4313 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4314 } 4315 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4316 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4317 4318 /* set defaults for coarse KSP and PC */ 4319 if (multilevel_allowed) { 4320 coarse_ksp_type = KSPRICHARDSON; 4321 coarse_pc_type = PCBDDC; 4322 } else { 4323 coarse_ksp_type = KSPPREONLY; 4324 coarse_pc_type = PCREDUNDANT; 4325 } 4326 4327 /* print some info if requested */ 4328 if (pcbddc->dbg_flag) { 4329 if (!multilevel_allowed) { 4330 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4331 if (multilevel_requested) { 4332 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4333 } else if (pcbddc->max_levels) { 4334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4335 } 4336 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4337 } 4338 } 4339 4340 /* create the coarse KSP object only once with defaults */ 4341 if (coarse_mat_is) { 4342 MatReuse coarse_mat_reuse; 4343 PetscViewer dbg_viewer = NULL; 4344 if (pcbddc->dbg_flag) { 4345 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4346 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4347 } 4348 if (!pcbddc->coarse_ksp) { 4349 char prefix[256],str_level[16]; 4350 size_t len; 4351 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4352 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4353 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4354 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4355 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4356 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4357 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4358 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4359 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4360 /* prefix */ 4361 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4362 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4363 if (!pcbddc->current_level) { 4364 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4365 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4366 } else { 4367 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4368 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4369 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4370 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4371 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4372 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4373 } 4374 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4375 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4376 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4377 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4378 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4379 /* allow user customization */ 4380 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4381 } 4382 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4383 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4384 if (nisdofs) { 4385 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4386 for (i=0;i<nisdofs;i++) { 4387 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4388 } 4389 } 4390 if (nisneu) { 4391 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4392 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4393 } 4394 4395 /* get some info after set from options */ 4396 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4397 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4398 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4399 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4400 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4401 isbddc = PETSC_FALSE; 4402 } 4403 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4404 if (isredundant) { 4405 KSP inner_ksp; 4406 PC inner_pc; 4407 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4408 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4409 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4410 } 4411 4412 /* assemble coarse matrix */ 4413 if (coarse_reuse) { 4414 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4415 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4416 coarse_mat_reuse = MAT_REUSE_MATRIX; 4417 } else { 4418 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4419 } 4420 if (isbddc || isnn) { 4421 if (pcbddc->coarsening_ratio > 1) { 4422 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4423 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4424 if (pcbddc->dbg_flag) { 4425 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4426 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4427 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4428 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4429 } 4430 } 4431 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4432 } else { 4433 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4434 coarse_mat = coarse_mat_is; 4435 } 4436 } else { 4437 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4438 } 4439 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4440 4441 /* propagate symmetry info of coarse matrix */ 4442 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4443 if (pc->pmat->symmetric_set) { 4444 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4445 } 4446 if (pc->pmat->hermitian_set) { 4447 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4448 } 4449 if (pc->pmat->spd_set) { 4450 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4451 } 4452 /* set operators */ 4453 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4454 if (pcbddc->dbg_flag) { 4455 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4456 } 4457 } else { /* processes non partecipating to coarse solver (if any) */ 4458 coarse_mat = 0; 4459 } 4460 ierr = PetscFree(isarray);CHKERRQ(ierr); 4461 #if 0 4462 { 4463 PetscViewer viewer; 4464 char filename[256]; 4465 sprintf(filename,"coarse_mat.m"); 4466 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4467 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4468 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4469 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4470 } 4471 #endif 4472 4473 /* Compute coarse null space (special handling by BDDC only) */ 4474 #if 0 4475 if (pcbddc->NullSpace) { 4476 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4477 } 4478 #endif 4479 4480 if (pcbddc->coarse_ksp) { 4481 Vec crhs,csol; 4482 PetscBool ispreonly; 4483 4484 if (CoarseNullSpace) { 4485 if (isbddc) { 4486 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4487 } else { 4488 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4489 } 4490 } 4491 /* setup coarse ksp */ 4492 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4493 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4494 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4495 /* hack */ 4496 if (!csol) { 4497 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4498 } 4499 if (!crhs) { 4500 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4501 } 4502 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4503 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4504 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4505 KSP check_ksp; 4506 KSPType check_ksp_type; 4507 PC check_pc; 4508 Vec check_vec,coarse_vec; 4509 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4510 PetscInt its; 4511 PetscBool compute_eigs; 4512 PetscReal *eigs_r,*eigs_c; 4513 PetscInt neigs; 4514 const char *prefix; 4515 4516 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4517 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4518 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4519 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4520 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4521 if (ispreonly) { 4522 check_ksp_type = KSPPREONLY; 4523 compute_eigs = PETSC_FALSE; 4524 } else { 4525 check_ksp_type = KSPGMRES; 4526 compute_eigs = PETSC_TRUE; 4527 } 4528 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4529 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4530 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4531 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4532 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4533 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4534 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4535 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4536 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4537 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4538 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4539 /* create random vec */ 4540 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4541 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4542 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4543 if (CoarseNullSpace) { 4544 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4545 } 4546 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4547 /* solve coarse problem */ 4548 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4549 if (CoarseNullSpace) { 4550 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4551 } 4552 /* set eigenvalue estimation if preonly has not been requested */ 4553 if (compute_eigs) { 4554 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4555 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4556 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4557 lambda_max = eigs_r[neigs-1]; 4558 lambda_min = eigs_r[0]; 4559 if (pcbddc->use_coarse_estimates) { 4560 if (lambda_max>lambda_min) { 4561 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4562 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4563 } 4564 } 4565 } 4566 4567 /* check coarse problem residual error */ 4568 if (pcbddc->dbg_flag) { 4569 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4570 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4571 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4572 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4573 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4574 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4575 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4576 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4577 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4578 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4579 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4580 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4581 if (compute_eigs) { 4582 PetscReal lambda_max_s,lambda_min_s; 4583 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4584 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4585 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4586 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4587 for (i=0;i<neigs;i++) { 4588 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4589 } 4590 } 4591 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4592 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4593 } 4594 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4595 if (compute_eigs) { 4596 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4597 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4598 } 4599 } 4600 } 4601 /* print additional info */ 4602 if (pcbddc->dbg_flag) { 4603 /* waits until all processes reaches this point */ 4604 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4605 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4606 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4607 } 4608 4609 /* free memory */ 4610 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4611 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4612 PetscFunctionReturn(0); 4613 } 4614 4615 #undef __FUNCT__ 4616 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4617 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4618 { 4619 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4620 PC_IS* pcis = (PC_IS*)pc->data; 4621 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4622 IS subset,subset_mult,subset_n; 4623 PetscInt local_size,coarse_size=0; 4624 PetscInt *local_primal_indices=NULL; 4625 const PetscInt *t_local_primal_indices; 4626 PetscErrorCode ierr; 4627 4628 PetscFunctionBegin; 4629 /* Compute global number of coarse dofs */ 4630 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4631 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4632 } 4633 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4634 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4635 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4636 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4637 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4638 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4639 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4640 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4641 if (local_size != pcbddc->local_primal_size) { 4642 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4643 } 4644 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4645 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4646 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4647 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4648 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4649 4650 /* check numbering */ 4651 if (pcbddc->dbg_flag) { 4652 PetscScalar coarsesum,*array; 4653 PetscInt i; 4654 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4655 4656 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4659 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4660 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4661 for (i=0;i<pcbddc->local_primal_size;i++) { 4662 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4663 } 4664 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4665 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4666 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4667 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4668 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4669 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4670 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4671 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4672 for (i=0;i<pcis->n;i++) { 4673 if (array[i] == 1.0) { 4674 set_error = PETSC_TRUE; 4675 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4676 } 4677 } 4678 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4679 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4680 for (i=0;i<pcis->n;i++) { 4681 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4682 } 4683 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4684 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4685 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4686 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4687 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4688 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4689 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4690 PetscInt *gidxs; 4691 4692 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4693 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4694 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4697 for (i=0;i<pcbddc->local_primal_size;i++) { 4698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4699 } 4700 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4701 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4702 } 4703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4704 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4705 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4706 } 4707 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4708 /* get back data */ 4709 *coarse_size_n = coarse_size; 4710 *local_primal_indices_n = local_primal_indices; 4711 PetscFunctionReturn(0); 4712 } 4713 4714 #undef __FUNCT__ 4715 #define __FUNCT__ "PCBDDCGlobalToLocal" 4716 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4717 { 4718 IS localis_t; 4719 PetscInt i,lsize,*idxs,n; 4720 PetscScalar *vals; 4721 PetscErrorCode ierr; 4722 4723 PetscFunctionBegin; 4724 /* get indices in local ordering exploiting local to global map */ 4725 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4726 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4727 for (i=0;i<lsize;i++) vals[i] = 1.0; 4728 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4729 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4730 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4731 if (idxs) { /* multilevel guard */ 4732 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4733 } 4734 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4735 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4736 ierr = PetscFree(vals);CHKERRQ(ierr); 4737 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4738 /* now compute set in local ordering */ 4739 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4740 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4741 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4742 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4743 for (i=0,lsize=0;i<n;i++) { 4744 if (PetscRealPart(vals[i]) > 0.5) { 4745 lsize++; 4746 } 4747 } 4748 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4749 for (i=0,lsize=0;i<n;i++) { 4750 if (PetscRealPart(vals[i]) > 0.5) { 4751 idxs[lsize++] = i; 4752 } 4753 } 4754 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4755 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4756 *localis = localis_t; 4757 PetscFunctionReturn(0); 4758 } 4759 4760 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4761 #undef __FUNCT__ 4762 #define __FUNCT__ "PCBDDCMatMult_Private" 4763 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4764 { 4765 PCBDDCChange_ctx change_ctx; 4766 PetscErrorCode ierr; 4767 4768 PetscFunctionBegin; 4769 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4770 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4771 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4772 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4773 PetscFunctionReturn(0); 4774 } 4775 4776 #undef __FUNCT__ 4777 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4778 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4779 { 4780 PCBDDCChange_ctx change_ctx; 4781 PetscErrorCode ierr; 4782 4783 PetscFunctionBegin; 4784 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4785 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4786 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4787 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4788 PetscFunctionReturn(0); 4789 } 4790 4791 #undef __FUNCT__ 4792 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4793 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4794 { 4795 PC_IS *pcis=(PC_IS*)pc->data; 4796 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4797 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4798 Mat S_j; 4799 PetscInt *used_xadj,*used_adjncy; 4800 PetscBool free_used_adj; 4801 PetscErrorCode ierr; 4802 4803 PetscFunctionBegin; 4804 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4805 free_used_adj = PETSC_FALSE; 4806 if (pcbddc->sub_schurs_layers == -1) { 4807 used_xadj = NULL; 4808 used_adjncy = NULL; 4809 } else { 4810 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4811 used_xadj = pcbddc->mat_graph->xadj; 4812 used_adjncy = pcbddc->mat_graph->adjncy; 4813 } else if (pcbddc->computed_rowadj) { 4814 used_xadj = pcbddc->mat_graph->xadj; 4815 used_adjncy = pcbddc->mat_graph->adjncy; 4816 } else { 4817 PetscBool flg_row=PETSC_FALSE; 4818 const PetscInt *xadj,*adjncy; 4819 PetscInt nvtxs; 4820 4821 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4822 if (flg_row) { 4823 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4824 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4825 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4826 free_used_adj = PETSC_TRUE; 4827 } else { 4828 pcbddc->sub_schurs_layers = -1; 4829 used_xadj = NULL; 4830 used_adjncy = NULL; 4831 } 4832 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4833 } 4834 } 4835 4836 /* setup sub_schurs data */ 4837 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4838 if (!sub_schurs->use_mumps) { 4839 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4840 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4841 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);CHKERRQ(ierr); 4842 } else { 4843 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4844 PetscBool isseqaij; 4845 if (!pcbddc->use_vertices && reuse_solvers) { 4846 PetscInt n_vertices; 4847 4848 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4849 reuse_solvers = (PetscBool)!n_vertices; 4850 } 4851 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4852 if (!isseqaij) { 4853 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4854 if (matis->A == pcbddc->local_mat) { 4855 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4856 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4857 } else { 4858 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4859 } 4860 } 4861 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);CHKERRQ(ierr); 4862 } 4863 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4864 4865 /* free adjacency */ 4866 if (free_used_adj) { 4867 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4868 } 4869 PetscFunctionReturn(0); 4870 } 4871 4872 #undef __FUNCT__ 4873 #define __FUNCT__ "PCBDDCInitSubSchurs" 4874 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4875 { 4876 PC_IS *pcis=(PC_IS*)pc->data; 4877 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4878 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4879 PCBDDCGraph graph; 4880 PetscErrorCode ierr; 4881 4882 PetscFunctionBegin; 4883 /* attach interface graph for determining subsets */ 4884 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4885 IS verticesIS,verticescomm; 4886 PetscInt vsize,*idxs; 4887 4888 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4889 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4890 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4891 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4892 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4893 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4894 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4895 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4896 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4897 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4898 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4899 /* 4900 if (pcbddc->dbg_flag) { 4901 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4902 } 4903 */ 4904 } else { 4905 graph = pcbddc->mat_graph; 4906 } 4907 4908 /* sub_schurs init */ 4909 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4910 4911 /* free graph struct */ 4912 if (pcbddc->sub_schurs_rebuild) { 4913 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4914 } 4915 PetscFunctionReturn(0); 4916 } 4917