1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* if range is true, it returns B s.t. span{B} = range(A) 10 if range is false, it returns B s.t. range(B) _|_ range(A) */ 11 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 12 { 13 #if !defined(PETSC_USE_COMPLEX) 14 PetscScalar *uwork,*data,*U, ds = 0.; 15 PetscReal *sing; 16 PetscBLASInt bM,bN,lwork,lierr,di = 1; 17 PetscInt ulw,i,nr,nc,n; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 #if defined(PETSC_MISSING_LAPACK_GESVD) 22 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 23 #else 24 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 25 if (!nr || !nc) PetscFunctionReturn(0); 26 27 /* workspace */ 28 if (!work) { 29 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 30 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 31 } else { 32 ulw = lw; 33 uwork = work; 34 } 35 n = PetscMin(nr,nc); 36 if (!rwork) { 37 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 38 } else { 39 sing = rwork; 40 } 41 42 /* SVD */ 43 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 47 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 48 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 49 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 50 ierr = PetscFPTrapPop();CHKERRQ(ierr); 51 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 52 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 53 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 54 if (!rwork) { 55 ierr = PetscFree(sing);CHKERRQ(ierr); 56 } 57 if (!work) { 58 ierr = PetscFree(uwork);CHKERRQ(ierr); 59 } 60 /* create B */ 61 if (!range) { 62 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 63 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 65 } else { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } 70 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 71 ierr = PetscFree(U);CHKERRQ(ierr); 72 #endif 73 #else /* PETSC_USE_COMPLEX */ 74 PetscFunctionBegin; 75 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 76 #endif 77 PetscFunctionReturn(0); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 PetscErrorCode ierr; 89 Mat GE,GEd; 90 PetscInt rsize,csize,esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 95 if (!esize) PetscFunctionReturn(0); 96 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 97 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 98 99 /* gradients */ 100 ptr = work + 5*esize; 101 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 103 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 104 ierr = MatDestroy(&GE);CHKERRQ(ierr); 105 106 /* constants */ 107 ptr += rsize*csize; 108 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 109 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 110 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 111 ierr = MatDestroy(&GE);CHKERRQ(ierr); 112 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 113 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 114 115 if (corners) { 116 Mat GEc; 117 PetscScalar *vals,v; 118 119 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 120 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 121 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 122 /* v = PetscAbsScalar(vals[0]) */; 123 v = 1.; 124 cvals[0] = vals[0]/v; 125 cvals[1] = vals[1]/v; 126 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 127 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 128 #if defined(PRINT_GDET) 129 { 130 PetscViewer viewer; 131 char filename[256]; 132 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 133 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 134 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 135 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 136 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 137 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 138 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 140 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 141 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 142 } 143 #endif 144 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 145 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 146 } 147 148 PetscFunctionReturn(0); 149 } 150 151 PetscErrorCode PCBDDCNedelecSupport(PC pc) 152 { 153 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 154 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 155 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 156 Vec tvec; 157 PetscSF sfv; 158 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 159 MPI_Comm comm; 160 IS lned,primals,allprimals,nedfieldlocal; 161 IS *eedges,*extrows,*extcols,*alleedges; 162 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 163 PetscScalar *vals,*work; 164 PetscReal *rwork; 165 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 166 PetscInt ne,nv,Lv,order,n,field; 167 PetscInt n_neigh,*neigh,*n_shared,**shared; 168 PetscInt i,j,extmem,cum,maxsize,nee; 169 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 170 PetscInt *sfvleaves,*sfvroots; 171 PetscInt *corners,*cedges; 172 PetscInt *ecount,**eneighs,*vcount,**vneighs; 173 #if defined(PETSC_USE_DEBUG) 174 PetscInt *emarks; 175 #endif 176 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 /* If the discrete gradient is defined for a subset of dofs and global is true, 181 it assumes G is given in global ordering for all the dofs. 182 Otherwise, the ordering is global for the Nedelec field */ 183 order = pcbddc->nedorder; 184 conforming = pcbddc->conforming; 185 field = pcbddc->nedfield; 186 global = pcbddc->nedglobal; 187 setprimal = PETSC_FALSE; 188 print = PETSC_FALSE; 189 singular = PETSC_FALSE; 190 191 /* Command line customization */ 192 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 193 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 194 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 195 ierr = PetscOptionsInt ("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 196 /* print debug info TODO: to be removed */ 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsEnd();CHKERRQ(ierr); 199 200 /* Return if there are no edges in the decomposition and the problem is not singular */ 201 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 202 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 203 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 204 if (!singular) { 205 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 206 lrc[0] = PETSC_FALSE; 207 for (i=0;i<n;i++) { 208 if (PetscRealPart(vals[i]) > 2.) { 209 lrc[0] = PETSC_TRUE; 210 break; 211 } 212 } 213 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 214 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 215 if (!lrc[1]) PetscFunctionReturn(0); 216 } 217 218 /* Get Nedelec field */ 219 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 220 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 221 if (pcbddc->n_ISForDofsLocal && field >= 0) { 222 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 223 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 224 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 225 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 226 ne = n; 227 nedfieldlocal = NULL; 228 global = PETSC_TRUE; 229 } else if (field == PETSC_DECIDE) { 230 PetscInt rst,ren,*idx; 231 232 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 233 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 234 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 235 for (i=rst;i<ren;i++) { 236 PetscInt nc; 237 238 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 239 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 240 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 } 242 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 243 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 244 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 245 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 246 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 247 } else { 248 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 249 } 250 251 /* Sanity checks */ 252 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 253 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 254 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 255 256 /* Just set primal dofs and return */ 257 if (setprimal) { 258 IS enedfieldlocal; 259 PetscInt *eidxs; 260 261 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 262 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 263 if (nedfieldlocal) { 264 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 265 for (i=0,cum=0;i<ne;i++) { 266 if (PetscRealPart(vals[idxs[i]]) > 2.) { 267 eidxs[cum++] = idxs[i]; 268 } 269 } 270 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 271 } else { 272 for (i=0,cum=0;i<ne;i++) { 273 if (PetscRealPart(vals[i]) > 2.) { 274 eidxs[cum++] = i; 275 } 276 } 277 } 278 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 279 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 280 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 281 ierr = PetscFree(eidxs);CHKERRQ(ierr); 282 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 283 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 /* Compute some l2g maps */ 288 if (nedfieldlocal) { 289 IS is; 290 291 /* need to map from the local Nedelec field to local numbering */ 292 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 293 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 294 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 295 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 297 if (global) { 298 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 299 el2g = al2g; 300 } else { 301 IS gis; 302 303 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 304 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 305 ierr = ISDestroy(&gis);CHKERRQ(ierr); 306 } 307 ierr = ISDestroy(&is);CHKERRQ(ierr); 308 } else { 309 /* restore default */ 310 pcbddc->nedfield = -1; 311 /* one ref for the destruction of al2g, one for el2g */ 312 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 313 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 314 el2g = al2g; 315 fl2g = NULL; 316 } 317 318 /* Start communication to drop connections for interior edges (for cc analysis only) */ 319 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 320 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 321 if (nedfieldlocal) { 322 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 324 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 } else { 326 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 327 } 328 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 329 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 330 331 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 332 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 333 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 334 if (global) { 335 PetscInt rst; 336 337 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 338 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 339 if (matis->sf_rootdata[i] < 2) { 340 matis->sf_rootdata[cum++] = i + rst; 341 } 342 } 343 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 344 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 345 } else { 346 PetscInt *tbz; 347 348 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 349 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 350 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 351 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 352 for (i=0,cum=0;i<ne;i++) 353 if (matis->sf_leafdata[idxs[i]] == 1) 354 tbz[cum++] = i; 355 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 357 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 358 ierr = PetscFree(tbz);CHKERRQ(ierr); 359 } 360 } else { /* we need the entire G to infer the nullspace */ 361 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 362 G = pcbddc->discretegradient; 363 } 364 365 /* Extract subdomain relevant rows of G */ 366 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 367 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 368 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 369 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISDestroy(&lned);CHKERRQ(ierr); 371 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 372 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 373 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 374 375 /* SF for nodal dofs communications */ 376 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 377 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 378 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 379 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 380 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 382 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 384 i = singular ? 2 : 1; 385 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 386 387 /* Destroy temporary G created in MATIS format and modified G */ 388 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 389 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 390 ierr = MatDestroy(&G);CHKERRQ(ierr); 391 392 if (print) { 393 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 394 ierr = MatView(lG,NULL);CHKERRQ(ierr); 395 } 396 397 /* Save lG for values insertion in change of basis */ 398 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 399 400 /* Analyze the edge-nodes connections (duplicate lG) */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 402 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 403 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 404 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 405 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 408 /* need to import the boundary specification to ensure the 409 proper detection of coarse edges' endpoints */ 410 if (pcbddc->DirichletBoundariesLocal) { 411 IS is; 412 413 if (fl2g) { 414 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 415 } else { 416 is = pcbddc->DirichletBoundariesLocal; 417 } 418 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 419 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 420 for (i=0;i<cum;i++) { 421 if (idxs[i] >= 0) { 422 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 423 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 424 } 425 } 426 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 427 if (fl2g) { 428 ierr = ISDestroy(&is);CHKERRQ(ierr); 429 } 430 } 431 if (pcbddc->NeumannBoundariesLocal) { 432 IS is; 433 434 if (fl2g) { 435 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 436 } else { 437 is = pcbddc->NeumannBoundariesLocal; 438 } 439 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 441 for (i=0;i<cum;i++) { 442 if (idxs[i] >= 0) { 443 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 444 } 445 } 446 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 447 if (fl2g) { 448 ierr = ISDestroy(&is);CHKERRQ(ierr); 449 } 450 } 451 452 /* Count neighs per dof */ 453 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 454 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 455 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 456 for (i=1,cum=0;i<n_neigh;i++) { 457 cum += n_shared[i]; 458 for (j=0;j<n_shared[i];j++) { 459 ecount[shared[i][j]]++; 460 } 461 } 462 if (ne) { 463 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 464 } 465 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 466 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 467 for (i=1;i<n_neigh;i++) { 468 for (j=0;j<n_shared[i];j++) { 469 PetscInt k = shared[i][j]; 470 eneighs[k][ecount[k]] = neigh[i]; 471 ecount[k]++; 472 } 473 } 474 for (i=0;i<ne;i++) { 475 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 476 } 477 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 478 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 479 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 480 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 481 for (i=1,cum=0;i<n_neigh;i++) { 482 cum += n_shared[i]; 483 for (j=0;j<n_shared[i];j++) { 484 vcount[shared[i][j]]++; 485 } 486 } 487 if (nv) { 488 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 489 } 490 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 491 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 492 for (i=1;i<n_neigh;i++) { 493 for (j=0;j<n_shared[i];j++) { 494 PetscInt k = shared[i][j]; 495 vneighs[k][vcount[k]] = neigh[i]; 496 vcount[k]++; 497 } 498 } 499 for (i=0;i<nv;i++) { 500 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 501 } 502 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 503 504 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 505 for proper detection of coarse edges' endpoints */ 506 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 507 for (i=0;i<ne;i++) { 508 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 509 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 510 } 511 } 512 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 513 if (!conforming) { 514 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 515 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 516 } 517 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 518 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 519 cum = 0; 520 for (i=0;i<ne;i++) { 521 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 522 if (!PetscBTLookup(btee,i)) { 523 marks[cum++] = i; 524 continue; 525 } 526 /* set badly connected edge dofs as primal */ 527 if (!conforming) { 528 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 529 marks[cum++] = i; 530 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 531 for (j=ii[i];j<ii[i+1];j++) { 532 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 533 } 534 } else { 535 /* every edge dofs should be connected trough a certain number of nodal dofs 536 to other edge dofs belonging to coarse edges 537 - at most 2 endpoints 538 - order-1 interior nodal dofs 539 - no undefined nodal dofs (nconn < order) 540 */ 541 PetscInt ends = 0,ints = 0, undef = 0; 542 for (j=ii[i];j<ii[i+1];j++) { 543 PetscInt v = jj[j],k; 544 PetscInt nconn = iit[v+1]-iit[v]; 545 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 546 if (nconn > order) ends++; 547 else if (nconn == order) ints++; 548 else undef++; 549 } 550 if (undef || ends > 2 || ints != order -1) { 551 marks[cum++] = i; 552 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 553 for (j=ii[i];j<ii[i+1];j++) { 554 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 555 } 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i+1] != ii[i]) { 561 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 562 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 563 } 564 } 565 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 566 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 567 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 568 if (!conforming) { 569 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 570 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 571 } 572 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 573 574 /* identify splitpoints and corner candidates */ 575 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 576 if (print) { 577 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 578 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 579 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 580 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 581 } 582 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 583 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 584 for (i=0;i<nv;i++) { 585 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 586 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 587 if (!order) { /* variable order */ 588 PetscReal vorder = 0.; 589 590 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 591 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 592 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 593 ord = 1; 594 } 595 #if defined(PETSC_USE_DEBUG) 596 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 597 #endif 598 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 599 if (PetscBTLookup(btbd,jj[j])) { 600 bdir = PETSC_TRUE; 601 break; 602 } 603 if (vc != ecount[jj[j]]) { 604 sneighs = PETSC_FALSE; 605 } else { 606 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 607 for (k=0;k<vc;k++) { 608 if (vn[k] != en[k]) { 609 sneighs = PETSC_FALSE; 610 break; 611 } 612 } 613 } 614 } 615 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 616 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 617 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 618 } else if (test == ord) { 619 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 620 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else { 623 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 624 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 625 } 626 } 627 } 628 ierr = PetscFree(ecount);CHKERRQ(ierr); 629 ierr = PetscFree(vcount);CHKERRQ(ierr); 630 if (ne) { 631 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 632 } 633 if (nv) { 634 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 635 } 636 ierr = PetscFree(eneighs);CHKERRQ(ierr); 637 ierr = PetscFree(vneighs);CHKERRQ(ierr); 638 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 639 640 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 641 if (order != 1) { 642 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 643 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 644 for (i=0;i<nv;i++) { 645 if (PetscBTLookup(btvcand,i)) { 646 PetscBool found = PETSC_FALSE; 647 for (j=ii[i];j<ii[i+1] && !found;j++) { 648 PetscInt k,e = jj[j]; 649 if (PetscBTLookup(bte,e)) continue; 650 for (k=iit[e];k<iit[e+1];k++) { 651 PetscInt v = jjt[k]; 652 if (v != i && PetscBTLookup(btvcand,v)) { 653 found = PETSC_TRUE; 654 break; 655 } 656 } 657 } 658 if (!found) { 659 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 660 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 661 } else { 662 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 663 } 664 } 665 } 666 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 667 } 668 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 669 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 670 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 671 672 /* Get the local G^T explicitly */ 673 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 674 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 675 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 676 677 /* Mark interior nodal dofs */ 678 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 679 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 680 for (i=1;i<n_neigh;i++) { 681 for (j=0;j<n_shared[i];j++) { 682 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 683 } 684 } 685 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 686 687 /* communicate corners and splitpoints */ 688 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 689 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 690 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 691 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 692 693 if (print) { 694 IS tbz; 695 696 cum = 0; 697 for (i=0;i<nv;i++) 698 if (sfvleaves[i]) 699 vmarks[cum++] = i; 700 701 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 702 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 703 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 704 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 705 } 706 707 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 708 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 709 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 710 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 711 712 /* Zero rows of lGt corresponding to identified corners 713 and interior nodal dofs */ 714 cum = 0; 715 for (i=0;i<nv;i++) { 716 if (sfvleaves[i]) { 717 vmarks[cum++] = i; 718 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 719 } 720 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 721 } 722 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 723 if (print) { 724 IS tbz; 725 726 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 727 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 728 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 729 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 730 } 731 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 732 ierr = PetscFree(vmarks);CHKERRQ(ierr); 733 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 734 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 735 736 /* Recompute G */ 737 ierr = MatDestroy(&lG);CHKERRQ(ierr); 738 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 739 if (print) { 740 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 741 ierr = MatView(lG,NULL);CHKERRQ(ierr); 742 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 743 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 744 } 745 746 /* Get primal dofs (if any) */ 747 cum = 0; 748 for (i=0;i<ne;i++) { 749 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 750 } 751 if (fl2g) { 752 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 753 } 754 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 755 if (print) { 756 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 757 ierr = ISView(primals,NULL);CHKERRQ(ierr); 758 } 759 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 760 /* TODO: what if the user passed in some of them ? */ 761 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 762 ierr = ISDestroy(&primals);CHKERRQ(ierr); 763 764 /* Compute edge connectivity */ 765 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 766 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 767 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 768 if (fl2g) { 769 PetscBT btf; 770 PetscInt *iia,*jja,*iiu,*jju; 771 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 772 773 /* create CSR for all local dofs */ 774 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 775 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 776 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 777 iiu = pcbddc->mat_graph->xadj; 778 jju = pcbddc->mat_graph->adjncy; 779 } else if (pcbddc->use_local_adj) { 780 rest = PETSC_TRUE; 781 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 782 } else { 783 free = PETSC_TRUE; 784 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 785 iiu[0] = 0; 786 for (i=0;i<n;i++) { 787 iiu[i+1] = i+1; 788 jju[i] = -1; 789 } 790 } 791 792 /* import sizes of CSR */ 793 iia[0] = 0; 794 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 795 796 /* overwrite entries corresponding to the Nedelec field */ 797 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 798 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 799 for (i=0;i<ne;i++) { 800 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 801 iia[idxs[i]+1] = ii[i+1]-ii[i]; 802 } 803 804 /* iia in CSR */ 805 for (i=0;i<n;i++) iia[i+1] += iia[i]; 806 807 /* jja in CSR */ 808 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 809 for (i=0;i<n;i++) 810 if (!PetscBTLookup(btf,i)) 811 for (j=0;j<iiu[i+1]-iiu[i];j++) 812 jja[iia[i]+j] = jju[iiu[i]+j]; 813 814 /* map edge dofs connectivity */ 815 if (jj) { 816 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 817 for (i=0;i<ne;i++) { 818 PetscInt e = idxs[i]; 819 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 820 } 821 } 822 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 823 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 824 if (rest) { 825 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 826 } 827 if (free) { 828 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 829 } 830 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 831 } else { 832 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 833 } 834 835 /* Analyze interface for edge dofs */ 836 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 837 pcbddc->mat_graph->twodim = PETSC_FALSE; 838 839 /* Get coarse edges in the edge space */ 840 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 841 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 842 843 if (fl2g) { 844 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 845 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 846 for (i=0;i<nee;i++) { 847 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 848 } 849 } else { 850 eedges = alleedges; 851 primals = allprimals; 852 } 853 854 /* Mark fine edge dofs with their coarse edge id */ 855 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 856 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 857 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 858 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 859 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 860 if (print) { 861 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 862 ierr = ISView(primals,NULL);CHKERRQ(ierr); 863 } 864 865 maxsize = 0; 866 for (i=0;i<nee;i++) { 867 PetscInt size,mark = i+1; 868 869 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 870 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 871 for (j=0;j<size;j++) marks[idxs[j]] = mark; 872 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 873 maxsize = PetscMax(maxsize,size); 874 } 875 876 /* Find coarse edge endpoints */ 877 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 878 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 879 for (i=0;i<nee;i++) { 880 PetscInt mark = i+1,size; 881 882 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 883 if (!size && nedfieldlocal) continue; 884 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 885 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 886 if (print) { 887 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 888 ISView(eedges[i],NULL); 889 } 890 for (j=0;j<size;j++) { 891 PetscInt k, ee = idxs[j]; 892 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 893 for (k=ii[ee];k<ii[ee+1];k++) { 894 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 895 if (PetscBTLookup(btv,jj[k])) { 896 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 897 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 898 PetscInt k2; 899 PetscBool corner = PETSC_FALSE; 900 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 901 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 902 /* it's a corner if either is connected with an edge dof belonging to a different cc or 903 if the edge dof lie on the natural part of the boundary */ 904 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 905 corner = PETSC_TRUE; 906 break; 907 } 908 } 909 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 910 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 911 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 912 } else { 913 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 914 } 915 } 916 } 917 } 918 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 919 } 920 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 921 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 922 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 923 924 /* Reset marked primal dofs */ 925 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 926 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 927 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 928 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 929 930 /* Now use the initial lG */ 931 ierr = MatDestroy(&lG);CHKERRQ(ierr); 932 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 933 lG = lGinit; 934 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 935 936 /* Compute extended cols indices */ 937 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 938 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 939 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 940 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 941 i *= maxsize; 942 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 943 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 944 eerr = PETSC_FALSE; 945 for (i=0;i<nee;i++) { 946 PetscInt size,found = 0; 947 948 cum = 0; 949 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 950 if (!size && nedfieldlocal) continue; 951 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 952 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 953 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 954 for (j=0;j<size;j++) { 955 PetscInt k,ee = idxs[j]; 956 for (k=ii[ee];k<ii[ee+1];k++) { 957 PetscInt vv = jj[k]; 958 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 959 else if (!PetscBTLookupSet(btvc,vv)) found++; 960 } 961 } 962 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 963 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 964 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 965 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 966 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 967 /* it may happen that endpoints are not defined at this point 968 if it is the case, mark this edge for a second pass */ 969 if (cum != size -1 || found != 2) { 970 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 971 if (print) { 972 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 973 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 974 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 975 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 976 } 977 eerr = PETSC_TRUE; 978 } 979 } 980 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 981 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 982 if (done) { 983 PetscInt *newprimals; 984 985 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 986 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 987 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 988 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 989 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 990 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 991 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 992 for (i=0;i<nee;i++) { 993 PetscBool has_candidates = PETSC_FALSE; 994 if (PetscBTLookup(bter,i)) { 995 PetscInt size,mark = i+1; 996 997 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 998 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 999 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1000 for (j=0;j<size;j++) { 1001 PetscInt k,ee = idxs[j]; 1002 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1003 for (k=ii[ee];k<ii[ee+1];k++) { 1004 /* set all candidates located on the edge as corners */ 1005 if (PetscBTLookup(btvcand,jj[k])) { 1006 PetscInt k2,vv = jj[k]; 1007 has_candidates = PETSC_TRUE; 1008 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1009 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1010 /* set all edge dofs connected to candidate as primals */ 1011 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1012 if (marks[jjt[k2]] == mark) { 1013 PetscInt k3,ee2 = jjt[k2]; 1014 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1015 newprimals[cum++] = ee2; 1016 /* finally set the new corners */ 1017 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1019 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1020 } 1021 } 1022 } 1023 } else { 1024 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1025 } 1026 } 1027 } 1028 if (!has_candidates) { /* circular edge */ 1029 PetscInt k, ee = idxs[0],*tmarks; 1030 1031 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1032 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1033 for (k=ii[ee];k<ii[ee+1];k++) { 1034 PetscInt k2; 1035 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1036 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1037 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1038 } 1039 for (j=0;j<size;j++) { 1040 if (tmarks[idxs[j]] > 1) { 1041 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1042 newprimals[cum++] = idxs[j]; 1043 } 1044 } 1045 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1046 } 1047 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1048 } 1049 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1050 } 1051 ierr = PetscFree(extcols);CHKERRQ(ierr); 1052 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1053 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1054 if (fl2g) { 1055 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1056 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1057 for (i=0;i<nee;i++) { 1058 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1059 } 1060 ierr = PetscFree(eedges);CHKERRQ(ierr); 1061 } 1062 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1063 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1064 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1065 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1066 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1067 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1068 pcbddc->mat_graph->twodim = PETSC_FALSE; 1069 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1070 if (fl2g) { 1071 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1072 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1073 for (i=0;i<nee;i++) { 1074 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1075 } 1076 } else { 1077 eedges = alleedges; 1078 primals = allprimals; 1079 } 1080 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1081 1082 /* Mark again */ 1083 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1084 for (i=0;i<nee;i++) { 1085 PetscInt size,mark = i+1; 1086 1087 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1088 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1089 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1090 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1091 } 1092 if (print) { 1093 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1094 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1095 } 1096 1097 /* Recompute extended cols */ 1098 eerr = PETSC_FALSE; 1099 for (i=0;i<nee;i++) { 1100 PetscInt size; 1101 1102 cum = 0; 1103 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1104 if (!size && nedfieldlocal) continue; 1105 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1106 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1107 for (j=0;j<size;j++) { 1108 PetscInt k,ee = idxs[j]; 1109 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1110 } 1111 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1112 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1113 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1114 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1115 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1116 if (cum != size -1) { 1117 if (print) { 1118 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1119 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1121 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1122 } 1123 eerr = PETSC_TRUE; 1124 } 1125 } 1126 } 1127 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1128 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1129 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1130 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1131 /* an error should not occur at this point */ 1132 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1133 1134 /* Check the number of endpoints */ 1135 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1136 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1137 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1138 for (i=0;i<nee;i++) { 1139 PetscInt size, found = 0, gc[2]; 1140 1141 /* init with defaults */ 1142 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1143 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1144 if (!size && nedfieldlocal) continue; 1145 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1146 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1147 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1148 for (j=0;j<size;j++) { 1149 PetscInt k,ee = idxs[j]; 1150 for (k=ii[ee];k<ii[ee+1];k++) { 1151 PetscInt vv = jj[k]; 1152 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1153 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1154 corners[i*2+found++] = vv; 1155 } 1156 } 1157 } 1158 if (found != 2) { 1159 PetscInt e; 1160 if (fl2g) { 1161 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1162 } else { 1163 e = idxs[0]; 1164 } 1165 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1166 } 1167 1168 /* get primal dof index on this coarse edge */ 1169 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1170 if (gc[0] > gc[1]) { 1171 PetscInt swap = corners[2*i]; 1172 corners[2*i] = corners[2*i+1]; 1173 corners[2*i+1] = swap; 1174 } 1175 cedges[i] = idxs[size-1]; 1176 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1177 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1178 } 1179 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1180 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1181 1182 #if defined(PETSC_USE_DEBUG) 1183 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1184 not interfere with neighbouring coarse edges */ 1185 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1186 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1187 for (i=0;i<nv;i++) { 1188 PetscInt emax = 0,eemax = 0; 1189 1190 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1191 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1192 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1193 for (j=1;j<nee+1;j++) { 1194 if (emax < emarks[j]) { 1195 emax = emarks[j]; 1196 eemax = j; 1197 } 1198 } 1199 /* not relevant for edges */ 1200 if (!eemax) continue; 1201 1202 for (j=ii[i];j<ii[i+1];j++) { 1203 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1204 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1205 } 1206 } 1207 } 1208 ierr = PetscFree(emarks);CHKERRQ(ierr); 1209 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1210 #endif 1211 1212 /* Compute extended rows indices for edge blocks of the change of basis */ 1213 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1215 extmem *= maxsize; 1216 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1217 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1218 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1219 for (i=0;i<nv;i++) { 1220 PetscInt mark = 0,size,start; 1221 1222 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1223 for (j=ii[i];j<ii[i+1];j++) 1224 if (marks[jj[j]] && !mark) 1225 mark = marks[jj[j]]; 1226 1227 /* not relevant */ 1228 if (!mark) continue; 1229 1230 /* import extended row */ 1231 mark--; 1232 start = mark*extmem+extrowcum[mark]; 1233 size = ii[i+1]-ii[i]; 1234 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1235 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1236 extrowcum[mark] += size; 1237 } 1238 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1239 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1240 ierr = PetscFree(marks);CHKERRQ(ierr); 1241 1242 /* Compress extrows */ 1243 cum = 0; 1244 for (i=0;i<nee;i++) { 1245 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1246 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1247 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1248 cum = PetscMax(cum,size); 1249 } 1250 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1251 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1252 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1253 1254 /* Workspace for lapack inner calls and VecSetValues */ 1255 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1256 1257 /* Create change of basis matrix (preallocation can be improved) */ 1258 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1259 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1260 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1261 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1262 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1263 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1264 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1265 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1266 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1267 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1268 1269 /* Defaults to identity */ 1270 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1271 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1272 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1273 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1274 1275 /* Create discrete gradient for the coarser level if needed */ 1276 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1277 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1278 if (pcbddc->current_level < pcbddc->max_levels) { 1279 ISLocalToGlobalMapping cel2g,cvl2g; 1280 IS wis,gwis; 1281 PetscInt cnv,cne; 1282 1283 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1284 if (fl2g) { 1285 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1286 } else { 1287 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1288 pcbddc->nedclocal = wis; 1289 } 1290 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1291 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1292 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1293 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1294 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1296 1297 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1298 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1300 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1301 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1302 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1304 1305 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1306 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1307 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1308 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1309 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1310 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1311 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1312 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1313 } 1314 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1315 1316 #if defined(PRINT_GDET) 1317 inc = 0; 1318 lev = pcbddc->current_level; 1319 #endif 1320 1321 /* Insert values in the change of basis matrix */ 1322 for (i=0;i<nee;i++) { 1323 Mat Gins = NULL, GKins = NULL; 1324 IS cornersis = NULL; 1325 PetscScalar cvals[2]; 1326 1327 if (pcbddc->nedcG) { 1328 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1329 } 1330 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1331 if (Gins && GKins) { 1332 PetscScalar *data; 1333 const PetscInt *rows,*cols; 1334 PetscInt nrh,nch,nrc,ncc; 1335 1336 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1337 /* H1 */ 1338 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1339 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1340 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1341 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1342 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1343 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1344 /* complement */ 1345 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1346 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1347 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1348 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1349 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1350 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1351 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1352 1353 /* coarse discrete gradient */ 1354 if (pcbddc->nedcG) { 1355 PetscInt cols[2]; 1356 1357 cols[0] = 2*i; 1358 cols[1] = 2*i+1; 1359 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1360 } 1361 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1362 } 1363 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1364 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1365 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1366 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1367 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1368 } 1369 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1370 1371 /* Start assembling */ 1372 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1373 if (pcbddc->nedcG) { 1374 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 } 1376 1377 /* Free */ 1378 if (fl2g) { 1379 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1380 for (i=0;i<nee;i++) { 1381 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(eedges);CHKERRQ(ierr); 1384 } 1385 1386 /* hack mat_graph with primal dofs on the coarse edges */ 1387 { 1388 PCBDDCGraph graph = pcbddc->mat_graph; 1389 PetscInt *oqueue = graph->queue; 1390 PetscInt *ocptr = graph->cptr; 1391 PetscInt ncc,*idxs; 1392 1393 /* find first primal edge */ 1394 if (pcbddc->nedclocal) { 1395 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1396 } else { 1397 if (fl2g) { 1398 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1399 } 1400 idxs = cedges; 1401 } 1402 cum = 0; 1403 while (cum < nee && cedges[cum] < 0) cum++; 1404 1405 /* adapt connected components */ 1406 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1407 graph->cptr[0] = 0; 1408 for (i=0,ncc=0;i<graph->ncc;i++) { 1409 PetscInt lc = ocptr[i+1]-ocptr[i]; 1410 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1411 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1412 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1413 ncc++; 1414 lc--; 1415 cum++; 1416 while (cum < nee && cedges[cum] < 0) cum++; 1417 } 1418 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1419 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1420 ncc++; 1421 } 1422 graph->ncc = ncc; 1423 if (pcbddc->nedclocal) { 1424 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1427 } 1428 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1429 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1430 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1431 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1432 1433 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1434 ierr = PetscFree(extrow);CHKERRQ(ierr); 1435 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1436 ierr = PetscFree(corners);CHKERRQ(ierr); 1437 ierr = PetscFree(cedges);CHKERRQ(ierr); 1438 ierr = PetscFree(extrows);CHKERRQ(ierr); 1439 ierr = PetscFree(extcols);CHKERRQ(ierr); 1440 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1441 1442 /* Complete assembling */ 1443 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1444 if (pcbddc->nedcG) { 1445 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 #if 0 1447 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1448 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1449 #endif 1450 } 1451 1452 /* set change of basis */ 1453 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1454 ierr = MatDestroy(&T);CHKERRQ(ierr); 1455 1456 PetscFunctionReturn(0); 1457 } 1458 1459 /* the near-null space of BDDC carries information on quadrature weights, 1460 and these can be collinear -> so cheat with MatNullSpaceCreate 1461 and create a suitable set of basis vectors first */ 1462 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1463 { 1464 PetscErrorCode ierr; 1465 PetscInt i; 1466 1467 PetscFunctionBegin; 1468 for (i=0;i<nvecs;i++) { 1469 PetscInt first,last; 1470 1471 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1472 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1473 if (i>=first && i < last) { 1474 PetscScalar *data; 1475 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1476 if (!has_const) { 1477 data[i-first] = 1.; 1478 } else { 1479 data[2*i-first] = 1./PetscSqrtReal(2.); 1480 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1481 } 1482 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1483 } 1484 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1485 } 1486 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1487 for (i=0;i<nvecs;i++) { /* reset vectors */ 1488 PetscInt first,last; 1489 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1490 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1491 if (i>=first && i < last) { 1492 PetscScalar *data; 1493 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1494 if (!has_const) { 1495 data[i-first] = 0.; 1496 } else { 1497 data[2*i-first] = 0.; 1498 data[2*i-first+1] = 0.; 1499 } 1500 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1501 } 1502 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1503 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1504 } 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1509 { 1510 Mat loc_divudotp; 1511 Vec p,v,vins,quad_vec,*quad_vecs; 1512 ISLocalToGlobalMapping map; 1513 IS *faces,*edges; 1514 PetscScalar *vals; 1515 const PetscScalar *array; 1516 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1517 PetscMPIInt rank; 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1522 if (graph->twodim) { 1523 lmaxneighs = 2; 1524 } else { 1525 lmaxneighs = 1; 1526 for (i=0;i<ne;i++) { 1527 const PetscInt *idxs; 1528 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1529 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1530 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1531 } 1532 lmaxneighs++; /* graph count does not include self */ 1533 } 1534 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1535 maxsize = 0; 1536 for (i=0;i<ne;i++) { 1537 PetscInt nn; 1538 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1539 maxsize = PetscMax(maxsize,nn); 1540 } 1541 for (i=0;i<nf;i++) { 1542 PetscInt nn; 1543 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1544 maxsize = PetscMax(maxsize,nn); 1545 } 1546 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1547 /* create vectors to hold quadrature weights */ 1548 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1549 if (!transpose) { 1550 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1551 } else { 1552 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1553 } 1554 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1555 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1556 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1557 for (i=0;i<maxneighs;i++) { 1558 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1559 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1560 } 1561 1562 /* compute local quad vec */ 1563 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1564 if (!transpose) { 1565 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1566 } else { 1567 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1568 } 1569 ierr = VecSet(p,1.);CHKERRQ(ierr); 1570 if (!transpose) { 1571 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1572 } else { 1573 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1574 } 1575 if (vl2l) { 1576 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1577 } else { 1578 vins = v; 1579 } 1580 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1581 ierr = VecDestroy(&p);CHKERRQ(ierr); 1582 1583 /* insert in global quadrature vecs */ 1584 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1585 for (i=0;i<nf;i++) { 1586 const PetscInt *idxs; 1587 PetscInt idx,nn,j; 1588 1589 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1590 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1591 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1592 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1593 idx = -(idx+1); 1594 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1595 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1596 } 1597 for (i=0;i<ne;i++) { 1598 const PetscInt *idxs; 1599 PetscInt idx,nn,j; 1600 1601 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1602 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1603 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1604 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1605 idx = -(idx+1); 1606 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1608 } 1609 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1610 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1611 if (vl2l) { 1612 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1613 } 1614 ierr = VecDestroy(&v);CHKERRQ(ierr); 1615 ierr = PetscFree(vals);CHKERRQ(ierr); 1616 1617 /* assemble near null space */ 1618 for (i=0;i<maxneighs;i++) { 1619 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1620 } 1621 for (i=0;i<maxneighs;i++) { 1622 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1623 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1624 } 1625 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 1630 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1631 { 1632 PetscErrorCode ierr; 1633 Vec local,global; 1634 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1635 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1636 1637 PetscFunctionBegin; 1638 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1639 /* need to convert from global to local topology information and remove references to information in global ordering */ 1640 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1641 if (pcbddc->user_provided_isfordofs) { 1642 if (pcbddc->n_ISForDofs) { 1643 PetscInt i; 1644 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1645 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1646 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1647 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1648 } 1649 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1650 pcbddc->n_ISForDofs = 0; 1651 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1652 } 1653 } else { 1654 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1655 PetscInt i, n = matis->A->rmap->n; 1656 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1657 if (i > 1) { 1658 pcbddc->n_ISForDofsLocal = i; 1659 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1660 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1661 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1662 } 1663 } 1664 } else { 1665 PetscInt i; 1666 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1667 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1668 } 1669 } 1670 } 1671 1672 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1673 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1674 } else if (pcbddc->DirichletBoundariesLocal) { 1675 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1676 } 1677 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1678 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1679 } else if (pcbddc->NeumannBoundariesLocal) { 1680 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1681 } 1682 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1683 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1684 } 1685 ierr = VecDestroy(&global);CHKERRQ(ierr); 1686 ierr = VecDestroy(&local);CHKERRQ(ierr); 1687 1688 PetscFunctionReturn(0); 1689 } 1690 1691 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1692 { 1693 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1694 PetscErrorCode ierr; 1695 IS nis; 1696 const PetscInt *idxs; 1697 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1698 PetscBool *ld; 1699 1700 PetscFunctionBegin; 1701 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1702 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1703 if (mop == MPI_LAND) { 1704 /* init rootdata with true */ 1705 ld = (PetscBool*) matis->sf_rootdata; 1706 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1707 } else { 1708 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1709 } 1710 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1711 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1712 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1713 ld = (PetscBool*) matis->sf_leafdata; 1714 for (i=0;i<nd;i++) 1715 if (-1 < idxs[i] && idxs[i] < n) 1716 ld[idxs[i]] = PETSC_TRUE; 1717 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1718 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1719 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1720 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1721 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1722 if (mop == MPI_LAND) { 1723 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1724 } else { 1725 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1726 } 1727 for (i=0,nnd=0;i<n;i++) 1728 if (ld[i]) 1729 nidxs[nnd++] = i; 1730 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1731 ierr = ISDestroy(is);CHKERRQ(ierr); 1732 *is = nis; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1737 { 1738 PC_IS *pcis = (PC_IS*)(pc->data); 1739 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1740 PetscErrorCode ierr; 1741 1742 PetscFunctionBegin; 1743 if (!pcbddc->benign_have_null) { 1744 PetscFunctionReturn(0); 1745 } 1746 if (pcbddc->ChangeOfBasisMatrix) { 1747 Vec swap; 1748 1749 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1750 swap = pcbddc->work_change; 1751 pcbddc->work_change = r; 1752 r = swap; 1753 } 1754 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1755 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1756 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1757 ierr = VecSet(z,0.);CHKERRQ(ierr); 1758 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1759 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1760 if (pcbddc->ChangeOfBasisMatrix) { 1761 pcbddc->work_change = r; 1762 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1763 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1764 } 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1769 { 1770 PCBDDCBenignMatMult_ctx ctx; 1771 PetscErrorCode ierr; 1772 PetscBool apply_right,apply_left,reset_x; 1773 1774 PetscFunctionBegin; 1775 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1776 if (transpose) { 1777 apply_right = ctx->apply_left; 1778 apply_left = ctx->apply_right; 1779 } else { 1780 apply_right = ctx->apply_right; 1781 apply_left = ctx->apply_left; 1782 } 1783 reset_x = PETSC_FALSE; 1784 if (apply_right) { 1785 const PetscScalar *ax; 1786 PetscInt nl,i; 1787 1788 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1789 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1790 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1791 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1792 for (i=0;i<ctx->benign_n;i++) { 1793 PetscScalar sum,val; 1794 const PetscInt *idxs; 1795 PetscInt nz,j; 1796 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1797 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1798 sum = 0.; 1799 if (ctx->apply_p0) { 1800 val = ctx->work[idxs[nz-1]]; 1801 for (j=0;j<nz-1;j++) { 1802 sum += ctx->work[idxs[j]]; 1803 ctx->work[idxs[j]] += val; 1804 } 1805 } else { 1806 for (j=0;j<nz-1;j++) { 1807 sum += ctx->work[idxs[j]]; 1808 } 1809 } 1810 ctx->work[idxs[nz-1]] -= sum; 1811 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1812 } 1813 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1814 reset_x = PETSC_TRUE; 1815 } 1816 if (transpose) { 1817 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1818 } else { 1819 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1820 } 1821 if (reset_x) { 1822 ierr = VecResetArray(x);CHKERRQ(ierr); 1823 } 1824 if (apply_left) { 1825 PetscScalar *ay; 1826 PetscInt i; 1827 1828 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1829 for (i=0;i<ctx->benign_n;i++) { 1830 PetscScalar sum,val; 1831 const PetscInt *idxs; 1832 PetscInt nz,j; 1833 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1834 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1835 val = -ay[idxs[nz-1]]; 1836 if (ctx->apply_p0) { 1837 sum = 0.; 1838 for (j=0;j<nz-1;j++) { 1839 sum += ay[idxs[j]]; 1840 ay[idxs[j]] += val; 1841 } 1842 ay[idxs[nz-1]] += sum; 1843 } else { 1844 for (j=0;j<nz-1;j++) { 1845 ay[idxs[j]] += val; 1846 } 1847 ay[idxs[nz-1]] = 0.; 1848 } 1849 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1850 } 1851 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1852 } 1853 PetscFunctionReturn(0); 1854 } 1855 1856 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1857 { 1858 PetscErrorCode ierr; 1859 1860 PetscFunctionBegin; 1861 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 1865 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1866 { 1867 PetscErrorCode ierr; 1868 1869 PetscFunctionBegin; 1870 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1875 { 1876 PC_IS *pcis = (PC_IS*)pc->data; 1877 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1878 PCBDDCBenignMatMult_ctx ctx; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 if (!restore) { 1883 Mat A_IB,A_BI; 1884 PetscScalar *work; 1885 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1886 1887 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1888 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1889 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1890 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1891 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1892 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1893 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1894 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1895 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1896 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1897 ctx->apply_left = PETSC_TRUE; 1898 ctx->apply_right = PETSC_FALSE; 1899 ctx->apply_p0 = PETSC_FALSE; 1900 ctx->benign_n = pcbddc->benign_n; 1901 if (reuse) { 1902 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1903 ctx->free = PETSC_FALSE; 1904 } else { /* TODO: could be optimized for successive solves */ 1905 ISLocalToGlobalMapping N_to_D; 1906 PetscInt i; 1907 1908 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1909 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1910 for (i=0;i<pcbddc->benign_n;i++) { 1911 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1912 } 1913 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1914 ctx->free = PETSC_TRUE; 1915 } 1916 ctx->A = pcis->A_IB; 1917 ctx->work = work; 1918 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1919 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1920 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1921 pcis->A_IB = A_IB; 1922 1923 /* A_BI as A_IB^T */ 1924 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1925 pcbddc->benign_original_mat = pcis->A_BI; 1926 pcis->A_BI = A_BI; 1927 } else { 1928 if (!pcbddc->benign_original_mat) { 1929 PetscFunctionReturn(0); 1930 } 1931 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1932 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1933 pcis->A_IB = ctx->A; 1934 ctx->A = NULL; 1935 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1936 pcis->A_BI = pcbddc->benign_original_mat; 1937 pcbddc->benign_original_mat = NULL; 1938 if (ctx->free) { 1939 PetscInt i; 1940 for (i=0;i<ctx->benign_n;i++) { 1941 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1942 } 1943 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1944 } 1945 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1946 ierr = PetscFree(ctx);CHKERRQ(ierr); 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 /* used just in bddc debug mode */ 1952 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1953 { 1954 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1955 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1956 Mat An; 1957 PetscErrorCode ierr; 1958 1959 PetscFunctionBegin; 1960 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1961 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1962 if (is1) { 1963 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1964 ierr = MatDestroy(&An);CHKERRQ(ierr); 1965 } else { 1966 *B = An; 1967 } 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /* TODO: add reuse flag */ 1972 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1973 { 1974 Mat Bt; 1975 PetscScalar *a,*bdata; 1976 const PetscInt *ii,*ij; 1977 PetscInt m,n,i,nnz,*bii,*bij; 1978 PetscBool flg_row; 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1983 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1984 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1985 nnz = n; 1986 for (i=0;i<ii[n];i++) { 1987 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1988 } 1989 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1990 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1991 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1992 nnz = 0; 1993 bii[0] = 0; 1994 for (i=0;i<n;i++) { 1995 PetscInt j; 1996 for (j=ii[i];j<ii[i+1];j++) { 1997 PetscScalar entry = a[j]; 1998 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1999 bij[nnz] = ij[j]; 2000 bdata[nnz] = entry; 2001 nnz++; 2002 } 2003 } 2004 bii[i+1] = nnz; 2005 } 2006 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2007 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2008 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2009 { 2010 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2011 b->free_a = PETSC_TRUE; 2012 b->free_ij = PETSC_TRUE; 2013 } 2014 *B = Bt; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 2019 { 2020 Mat B; 2021 IS is_dummy,*cc_n; 2022 ISLocalToGlobalMapping l2gmap_dummy; 2023 PCBDDCGraph graph; 2024 PetscInt i,n; 2025 PetscInt *xadj,*adjncy; 2026 PetscInt *xadj_filtered,*adjncy_filtered; 2027 PetscBool flg_row,isseqaij; 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 if (!A->rmap->N || !A->cmap->N) { 2032 *ncc = 0; 2033 *cc = NULL; 2034 PetscFunctionReturn(0); 2035 } 2036 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2037 if (!isseqaij && filter) { 2038 PetscBool isseqdense; 2039 2040 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2041 if (!isseqdense) { 2042 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2043 } else { /* TODO: rectangular case and LDA */ 2044 PetscScalar *array; 2045 PetscReal chop=1.e-6; 2046 2047 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2048 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2049 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2050 for (i=0;i<n;i++) { 2051 PetscInt j; 2052 for (j=i+1;j<n;j++) { 2053 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2054 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2055 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2056 } 2057 } 2058 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2059 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2060 } 2061 } else { 2062 B = A; 2063 } 2064 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2065 2066 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2067 if (filter) { 2068 PetscScalar *data; 2069 PetscInt j,cum; 2070 2071 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2072 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2073 cum = 0; 2074 for (i=0;i<n;i++) { 2075 PetscInt t; 2076 2077 for (j=xadj[i];j<xadj[i+1];j++) { 2078 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2079 continue; 2080 } 2081 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2082 } 2083 t = xadj_filtered[i]; 2084 xadj_filtered[i] = cum; 2085 cum += t; 2086 } 2087 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2088 } else { 2089 xadj_filtered = NULL; 2090 adjncy_filtered = NULL; 2091 } 2092 2093 /* compute local connected components using PCBDDCGraph */ 2094 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2095 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2096 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2097 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2098 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2099 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2100 if (xadj_filtered) { 2101 graph->xadj = xadj_filtered; 2102 graph->adjncy = adjncy_filtered; 2103 } else { 2104 graph->xadj = xadj; 2105 graph->adjncy = adjncy; 2106 } 2107 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2108 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2109 /* partial clean up */ 2110 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2111 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2112 if (A != B) { 2113 ierr = MatDestroy(&B);CHKERRQ(ierr); 2114 } 2115 2116 /* get back data */ 2117 if (ncc) *ncc = graph->ncc; 2118 if (cc) { 2119 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2120 for (i=0;i<graph->ncc;i++) { 2121 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2122 } 2123 *cc = cc_n; 2124 } 2125 /* clean up graph */ 2126 graph->xadj = 0; 2127 graph->adjncy = 0; 2128 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2133 { 2134 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2135 PC_IS* pcis = (PC_IS*)(pc->data); 2136 IS dirIS = NULL; 2137 PetscInt i; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2142 if (zerodiag) { 2143 Mat A; 2144 Vec vec3_N; 2145 PetscScalar *vals; 2146 const PetscInt *idxs; 2147 PetscInt nz,*count; 2148 2149 /* p0 */ 2150 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2151 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2152 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2153 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2154 for (i=0;i<nz;i++) vals[i] = 1.; 2155 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2156 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2157 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2158 /* v_I */ 2159 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2160 for (i=0;i<nz;i++) vals[i] = 0.; 2161 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2162 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2163 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2164 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2165 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2166 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2167 if (dirIS) { 2168 PetscInt n; 2169 2170 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2171 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2172 for (i=0;i<n;i++) vals[i] = 0.; 2173 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2174 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2175 } 2176 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2177 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2178 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2179 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2180 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2181 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2182 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2183 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2184 ierr = PetscFree(vals);CHKERRQ(ierr); 2185 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2186 2187 /* there should not be any pressure dofs lying on the interface */ 2188 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2189 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2190 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2191 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2192 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2193 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2194 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2195 ierr = PetscFree(count);CHKERRQ(ierr); 2196 } 2197 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2198 2199 /* check PCBDDCBenignGetOrSetP0 */ 2200 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2201 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2202 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2203 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2204 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2205 for (i=0;i<pcbddc->benign_n;i++) { 2206 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2207 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2208 } 2209 PetscFunctionReturn(0); 2210 } 2211 2212 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2213 { 2214 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2215 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2216 PetscInt nz,n; 2217 PetscInt *interior_dofs,n_interior_dofs,nneu; 2218 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2223 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2224 for (n=0;n<pcbddc->benign_n;n++) { 2225 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2226 } 2227 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2228 pcbddc->benign_n = 0; 2229 2230 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2231 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2232 Checks if all the pressure dofs in each subdomain have a zero diagonal 2233 If not, a change of basis on pressures is not needed 2234 since the local Schur complements are already SPD 2235 */ 2236 has_null_pressures = PETSC_TRUE; 2237 have_null = PETSC_TRUE; 2238 if (pcbddc->n_ISForDofsLocal) { 2239 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2240 2241 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2242 ierr = PetscOptionsInt ("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2243 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2244 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2245 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2246 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2247 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2248 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2249 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2250 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2251 if (!sorted) { 2252 ierr = ISSort(pressures);CHKERRQ(ierr); 2253 } 2254 } else { 2255 pressures = NULL; 2256 } 2257 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2258 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2259 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2260 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2261 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2262 if (!sorted) { 2263 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2264 } 2265 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2266 zerodiag_save = zerodiag; 2267 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2268 if (!nz) { 2269 if (n) have_null = PETSC_FALSE; 2270 has_null_pressures = PETSC_FALSE; 2271 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2272 } 2273 recompute_zerodiag = PETSC_FALSE; 2274 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2275 zerodiag_subs = NULL; 2276 pcbddc->benign_n = 0; 2277 n_interior_dofs = 0; 2278 interior_dofs = NULL; 2279 nneu = 0; 2280 if (pcbddc->NeumannBoundariesLocal) { 2281 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2282 } 2283 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2284 if (checkb) { /* need to compute interior nodes */ 2285 PetscInt n,i,j; 2286 PetscInt n_neigh,*neigh,*n_shared,**shared; 2287 PetscInt *iwork; 2288 2289 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2290 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2291 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2292 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2293 for (i=1;i<n_neigh;i++) 2294 for (j=0;j<n_shared[i];j++) 2295 iwork[shared[i][j]] += 1; 2296 for (i=0;i<n;i++) 2297 if (!iwork[i]) 2298 interior_dofs[n_interior_dofs++] = i; 2299 ierr = PetscFree(iwork);CHKERRQ(ierr); 2300 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2301 } 2302 if (has_null_pressures) { 2303 IS *subs; 2304 PetscInt nsubs,i,j,nl; 2305 const PetscInt *idxs; 2306 PetscScalar *array; 2307 Vec *work; 2308 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2309 2310 subs = pcbddc->local_subs; 2311 nsubs = pcbddc->n_local_subs; 2312 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2313 if (checkb) { 2314 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2315 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2316 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2317 /* work[0] = 1_p */ 2318 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2319 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2320 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2321 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2322 /* work[0] = 1_v */ 2323 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2324 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2325 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2326 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2327 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2328 } 2329 if (nsubs > 1) { 2330 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2331 for (i=0;i<nsubs;i++) { 2332 ISLocalToGlobalMapping l2g; 2333 IS t_zerodiag_subs; 2334 PetscInt nl; 2335 2336 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2337 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2338 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2339 if (nl) { 2340 PetscBool valid = PETSC_TRUE; 2341 2342 if (checkb) { 2343 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2344 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2345 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2346 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2347 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2348 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2349 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2350 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2351 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2352 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2353 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2354 for (j=0;j<n_interior_dofs;j++) { 2355 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2356 valid = PETSC_FALSE; 2357 break; 2358 } 2359 } 2360 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2361 } 2362 if (valid && nneu) { 2363 const PetscInt *idxs; 2364 PetscInt nzb; 2365 2366 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2367 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2368 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2369 if (nzb) valid = PETSC_FALSE; 2370 } 2371 if (valid && pressures) { 2372 IS t_pressure_subs; 2373 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2374 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2375 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2376 } 2377 if (valid) { 2378 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2379 pcbddc->benign_n++; 2380 } else { 2381 recompute_zerodiag = PETSC_TRUE; 2382 } 2383 } 2384 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2385 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2386 } 2387 } else { /* there's just one subdomain (or zero if they have not been detected */ 2388 PetscBool valid = PETSC_TRUE; 2389 2390 if (nneu) valid = PETSC_FALSE; 2391 if (valid && pressures) { 2392 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2393 } 2394 if (valid && checkb) { 2395 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2396 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2397 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2398 for (j=0;j<n_interior_dofs;j++) { 2399 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2400 valid = PETSC_FALSE; 2401 break; 2402 } 2403 } 2404 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2405 } 2406 if (valid) { 2407 pcbddc->benign_n = 1; 2408 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2409 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2410 zerodiag_subs[0] = zerodiag; 2411 } 2412 } 2413 if (checkb) { 2414 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2415 } 2416 } 2417 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2418 2419 if (!pcbddc->benign_n) { 2420 PetscInt n; 2421 2422 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2423 recompute_zerodiag = PETSC_FALSE; 2424 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2425 if (n) { 2426 has_null_pressures = PETSC_FALSE; 2427 have_null = PETSC_FALSE; 2428 } 2429 } 2430 2431 /* final check for null pressures */ 2432 if (zerodiag && pressures) { 2433 PetscInt nz,np; 2434 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2435 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2436 if (nz != np) have_null = PETSC_FALSE; 2437 } 2438 2439 if (recompute_zerodiag) { 2440 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2441 if (pcbddc->benign_n == 1) { 2442 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2443 zerodiag = zerodiag_subs[0]; 2444 } else { 2445 PetscInt i,nzn,*new_idxs; 2446 2447 nzn = 0; 2448 for (i=0;i<pcbddc->benign_n;i++) { 2449 PetscInt ns; 2450 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2451 nzn += ns; 2452 } 2453 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2454 nzn = 0; 2455 for (i=0;i<pcbddc->benign_n;i++) { 2456 PetscInt ns,*idxs; 2457 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2458 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2459 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2460 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2461 nzn += ns; 2462 } 2463 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2464 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2465 } 2466 have_null = PETSC_FALSE; 2467 } 2468 2469 /* Prepare matrix to compute no-net-flux */ 2470 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2471 Mat A,loc_divudotp; 2472 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2473 IS row,col,isused = NULL; 2474 PetscInt M,N,n,st,n_isused; 2475 2476 if (pressures) { 2477 isused = pressures; 2478 } else { 2479 isused = zerodiag_save; 2480 } 2481 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2482 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2483 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2484 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2485 n_isused = 0; 2486 if (isused) { 2487 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2488 } 2489 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2490 st = st-n_isused; 2491 if (n) { 2492 const PetscInt *gidxs; 2493 2494 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2495 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2496 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2497 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2498 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2499 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2500 } else { 2501 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2502 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2503 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2504 } 2505 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2506 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2507 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2508 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2509 ierr = ISDestroy(&row);CHKERRQ(ierr); 2510 ierr = ISDestroy(&col);CHKERRQ(ierr); 2511 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2512 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2513 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2514 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2515 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2516 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2517 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2518 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2519 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2520 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2521 } 2522 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2523 2524 /* change of basis and p0 dofs */ 2525 if (has_null_pressures) { 2526 IS zerodiagc; 2527 const PetscInt *idxs,*idxsc; 2528 PetscInt i,s,*nnz; 2529 2530 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2531 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2532 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2533 /* local change of basis for pressures */ 2534 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2535 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2536 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2537 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2538 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2539 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2540 for (i=0;i<pcbddc->benign_n;i++) { 2541 PetscInt nzs,j; 2542 2543 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2544 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2545 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2546 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2547 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2548 } 2549 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2550 ierr = PetscFree(nnz);CHKERRQ(ierr); 2551 /* set identity on velocities */ 2552 for (i=0;i<n-nz;i++) { 2553 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2554 } 2555 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2556 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2557 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2558 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2559 /* set change on pressures */ 2560 for (s=0;s<pcbddc->benign_n;s++) { 2561 PetscScalar *array; 2562 PetscInt nzs; 2563 2564 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2565 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2566 for (i=0;i<nzs-1;i++) { 2567 PetscScalar vals[2]; 2568 PetscInt cols[2]; 2569 2570 cols[0] = idxs[i]; 2571 cols[1] = idxs[nzs-1]; 2572 vals[0] = 1.; 2573 vals[1] = 1.; 2574 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2575 } 2576 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2577 for (i=0;i<nzs-1;i++) array[i] = -1.; 2578 array[nzs-1] = 1.; 2579 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2580 /* store local idxs for p0 */ 2581 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2582 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2583 ierr = PetscFree(array);CHKERRQ(ierr); 2584 } 2585 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2586 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587 /* project if needed */ 2588 if (pcbddc->benign_change_explicit) { 2589 Mat M; 2590 2591 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2592 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2593 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2594 ierr = MatDestroy(&M);CHKERRQ(ierr); 2595 } 2596 /* store global idxs for p0 */ 2597 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2598 } 2599 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2600 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2601 2602 /* determines if the coarse solver will be singular or not */ 2603 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2604 /* determines if the problem has subdomains with 0 pressure block */ 2605 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2606 *zerodiaglocal = zerodiag; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2611 { 2612 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2613 PetscScalar *array; 2614 PetscErrorCode ierr; 2615 2616 PetscFunctionBegin; 2617 if (!pcbddc->benign_sf) { 2618 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2619 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2620 } 2621 if (get) { 2622 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2623 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2624 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2625 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2626 } else { 2627 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2628 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2629 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2630 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2631 } 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2636 { 2637 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2638 PetscErrorCode ierr; 2639 2640 PetscFunctionBegin; 2641 /* TODO: add error checking 2642 - avoid nested pop (or push) calls. 2643 - cannot push before pop. 2644 - cannot call this if pcbddc->local_mat is NULL 2645 */ 2646 if (!pcbddc->benign_n) { 2647 PetscFunctionReturn(0); 2648 } 2649 if (pop) { 2650 if (pcbddc->benign_change_explicit) { 2651 IS is_p0; 2652 MatReuse reuse; 2653 2654 /* extract B_0 */ 2655 reuse = MAT_INITIAL_MATRIX; 2656 if (pcbddc->benign_B0) { 2657 reuse = MAT_REUSE_MATRIX; 2658 } 2659 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2660 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2661 /* remove rows and cols from local problem */ 2662 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2663 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2664 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2665 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2666 } else { 2667 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2668 PetscScalar *vals; 2669 PetscInt i,n,*idxs_ins; 2670 2671 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2672 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2673 if (!pcbddc->benign_B0) { 2674 PetscInt *nnz; 2675 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2676 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2677 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2678 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2679 for (i=0;i<pcbddc->benign_n;i++) { 2680 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2681 nnz[i] = n - nnz[i]; 2682 } 2683 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2684 ierr = PetscFree(nnz);CHKERRQ(ierr); 2685 } 2686 2687 for (i=0;i<pcbddc->benign_n;i++) { 2688 PetscScalar *array; 2689 PetscInt *idxs,j,nz,cum; 2690 2691 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2692 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2693 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2694 for (j=0;j<nz;j++) vals[j] = 1.; 2695 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2696 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2697 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2698 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2699 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2700 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2701 cum = 0; 2702 for (j=0;j<n;j++) { 2703 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2704 vals[cum] = array[j]; 2705 idxs_ins[cum] = j; 2706 cum++; 2707 } 2708 } 2709 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2710 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2711 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2712 } 2713 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2714 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2715 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2716 } 2717 } else { /* push */ 2718 if (pcbddc->benign_change_explicit) { 2719 PetscInt i; 2720 2721 for (i=0;i<pcbddc->benign_n;i++) { 2722 PetscScalar *B0_vals; 2723 PetscInt *B0_cols,B0_ncol; 2724 2725 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2726 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2727 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2728 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2729 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2730 } 2731 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2732 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2733 } else { 2734 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2735 } 2736 } 2737 PetscFunctionReturn(0); 2738 } 2739 2740 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2741 { 2742 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2743 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2744 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2745 PetscBLASInt *B_iwork,*B_ifail; 2746 PetscScalar *work,lwork; 2747 PetscScalar *St,*S,*eigv; 2748 PetscScalar *Sarray,*Starray; 2749 PetscReal *eigs,thresh; 2750 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2751 PetscBool allocated_S_St; 2752 #if defined(PETSC_USE_COMPLEX) 2753 PetscReal *rwork; 2754 #endif 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2759 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2760 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2761 2762 if (pcbddc->dbg_flag) { 2763 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2764 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2765 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2766 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2767 } 2768 2769 if (pcbddc->dbg_flag) { 2770 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2771 } 2772 2773 /* max size of subsets */ 2774 mss = 0; 2775 for (i=0;i<sub_schurs->n_subs;i++) { 2776 PetscInt subset_size; 2777 2778 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2779 mss = PetscMax(mss,subset_size); 2780 } 2781 2782 /* min/max and threshold */ 2783 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2784 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2785 nmax = PetscMax(nmin,nmax); 2786 allocated_S_St = PETSC_FALSE; 2787 if (nmin) { 2788 allocated_S_St = PETSC_TRUE; 2789 } 2790 2791 /* allocate lapack workspace */ 2792 cum = cum2 = 0; 2793 maxneigs = 0; 2794 for (i=0;i<sub_schurs->n_subs;i++) { 2795 PetscInt n,subset_size; 2796 2797 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2798 n = PetscMin(subset_size,nmax); 2799 cum += subset_size; 2800 cum2 += subset_size*n; 2801 maxneigs = PetscMax(maxneigs,n); 2802 } 2803 if (mss) { 2804 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2805 PetscBLASInt B_itype = 1; 2806 PetscBLASInt B_N = mss; 2807 PetscReal zero = 0.0; 2808 PetscReal eps = 0.0; /* dlamch? */ 2809 2810 B_lwork = -1; 2811 S = NULL; 2812 St = NULL; 2813 eigs = NULL; 2814 eigv = NULL; 2815 B_iwork = NULL; 2816 B_ifail = NULL; 2817 #if defined(PETSC_USE_COMPLEX) 2818 rwork = NULL; 2819 #endif 2820 thresh = 1.0; 2821 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2822 #if defined(PETSC_USE_COMPLEX) 2823 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2824 #else 2825 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2826 #endif 2827 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2828 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2829 } else { 2830 /* TODO */ 2831 } 2832 } else { 2833 lwork = 0; 2834 } 2835 2836 nv = 0; 2837 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2838 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2839 } 2840 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2841 if (allocated_S_St) { 2842 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2843 } 2844 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2845 #if defined(PETSC_USE_COMPLEX) 2846 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2847 #endif 2848 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2849 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2850 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2851 nv+cum,&pcbddc->adaptive_constraints_idxs, 2852 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2853 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2854 2855 maxneigs = 0; 2856 cum = cumarray = 0; 2857 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2858 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2859 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2860 const PetscInt *idxs; 2861 2862 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2863 for (cum=0;cum<nv;cum++) { 2864 pcbddc->adaptive_constraints_n[cum] = 1; 2865 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2866 pcbddc->adaptive_constraints_data[cum] = 1.0; 2867 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2868 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2869 } 2870 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2871 } 2872 2873 if (mss) { /* multilevel */ 2874 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2875 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2876 } 2877 2878 thresh = pcbddc->adaptive_threshold; 2879 for (i=0;i<sub_schurs->n_subs;i++) { 2880 const PetscInt *idxs; 2881 PetscReal upper,lower; 2882 PetscInt j,subset_size,eigs_start = 0; 2883 PetscBLASInt B_N; 2884 PetscBool same_data = PETSC_FALSE; 2885 2886 if (pcbddc->use_deluxe_scaling) { 2887 upper = PETSC_MAX_REAL; 2888 lower = thresh; 2889 } else { 2890 upper = 1./thresh; 2891 lower = 0.; 2892 } 2893 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2894 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2895 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2896 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2897 if (sub_schurs->is_hermitian) { 2898 PetscInt j,k; 2899 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2900 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2901 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2902 } 2903 for (j=0;j<subset_size;j++) { 2904 for (k=j;k<subset_size;k++) { 2905 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2906 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2907 } 2908 } 2909 } else { 2910 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2911 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2912 } 2913 } else { 2914 S = Sarray + cumarray; 2915 St = Starray + cumarray; 2916 } 2917 /* see if we can save some work */ 2918 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2919 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2920 } 2921 2922 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2923 B_neigs = 0; 2924 } else { 2925 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2926 PetscBLASInt B_itype = 1; 2927 PetscBLASInt B_IL, B_IU; 2928 PetscReal eps = -1.0; /* dlamch? */ 2929 PetscInt nmin_s; 2930 PetscBool compute_range = PETSC_FALSE; 2931 2932 if (pcbddc->dbg_flag) { 2933 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2934 } 2935 2936 compute_range = PETSC_FALSE; 2937 if (thresh > 1.+PETSC_SMALL && !same_data) { 2938 compute_range = PETSC_TRUE; 2939 } 2940 2941 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2942 if (compute_range) { 2943 2944 /* ask for eigenvalues larger than thresh */ 2945 #if defined(PETSC_USE_COMPLEX) 2946 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2947 #else 2948 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2949 #endif 2950 } else if (!same_data) { 2951 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2952 B_IL = 1; 2953 #if defined(PETSC_USE_COMPLEX) 2954 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2955 #else 2956 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2957 #endif 2958 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2959 PetscInt k; 2960 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2961 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2962 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2963 nmin = nmax; 2964 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2965 for (k=0;k<nmax;k++) { 2966 eigs[k] = 1./PETSC_SMALL; 2967 eigv[k*(subset_size+1)] = 1.0; 2968 } 2969 } 2970 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2971 if (B_ierr) { 2972 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2973 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2974 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2975 } 2976 2977 if (B_neigs > nmax) { 2978 if (pcbddc->dbg_flag) { 2979 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2980 } 2981 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2982 B_neigs = nmax; 2983 } 2984 2985 nmin_s = PetscMin(nmin,B_N); 2986 if (B_neigs < nmin_s) { 2987 PetscBLASInt B_neigs2; 2988 2989 if (pcbddc->use_deluxe_scaling) { 2990 B_IL = B_N - nmin_s + 1; 2991 B_IU = B_N - B_neigs; 2992 } else { 2993 B_IL = B_neigs + 1; 2994 B_IU = nmin_s; 2995 } 2996 if (pcbddc->dbg_flag) { 2997 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2998 } 2999 if (sub_schurs->is_hermitian) { 3000 PetscInt j,k; 3001 for (j=0;j<subset_size;j++) { 3002 for (k=j;k<subset_size;k++) { 3003 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3004 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3005 } 3006 } 3007 } else { 3008 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3009 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3010 } 3011 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3012 #if defined(PETSC_USE_COMPLEX) 3013 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3014 #else 3015 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3016 #endif 3017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3018 B_neigs += B_neigs2; 3019 } 3020 if (B_ierr) { 3021 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3022 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3023 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3024 } 3025 if (pcbddc->dbg_flag) { 3026 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3027 for (j=0;j<B_neigs;j++) { 3028 if (eigs[j] == 0.0) { 3029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3030 } else { 3031 if (pcbddc->use_deluxe_scaling) { 3032 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3033 } else { 3034 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3035 } 3036 } 3037 } 3038 } 3039 } else { 3040 /* TODO */ 3041 } 3042 } 3043 /* change the basis back to the original one */ 3044 if (sub_schurs->change) { 3045 Mat change,phi,phit; 3046 3047 if (pcbddc->dbg_flag > 1) { 3048 PetscInt ii; 3049 for (ii=0;ii<B_neigs;ii++) { 3050 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3051 for (j=0;j<B_N;j++) { 3052 #if defined(PETSC_USE_COMPLEX) 3053 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3054 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3056 #else 3057 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3058 #endif 3059 } 3060 } 3061 } 3062 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3063 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3064 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3065 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3066 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3067 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3068 } 3069 maxneigs = PetscMax(B_neigs,maxneigs); 3070 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3071 if (B_neigs) { 3072 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3073 3074 if (pcbddc->dbg_flag > 1) { 3075 PetscInt ii; 3076 for (ii=0;ii<B_neigs;ii++) { 3077 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3078 for (j=0;j<B_N;j++) { 3079 #if defined(PETSC_USE_COMPLEX) 3080 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3081 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3082 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3083 #else 3084 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3085 #endif 3086 } 3087 } 3088 } 3089 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3090 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3091 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3092 cum++; 3093 } 3094 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3095 /* shift for next computation */ 3096 cumarray += subset_size*subset_size; 3097 } 3098 if (pcbddc->dbg_flag) { 3099 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3100 } 3101 3102 if (mss) { 3103 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3104 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3105 /* destroy matrices (junk) */ 3106 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3107 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3108 } 3109 if (allocated_S_St) { 3110 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3111 } 3112 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3113 #if defined(PETSC_USE_COMPLEX) 3114 ierr = PetscFree(rwork);CHKERRQ(ierr); 3115 #endif 3116 if (pcbddc->dbg_flag) { 3117 PetscInt maxneigs_r; 3118 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3120 } 3121 PetscFunctionReturn(0); 3122 } 3123 3124 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3125 { 3126 PetscScalar *coarse_submat_vals; 3127 PetscErrorCode ierr; 3128 3129 PetscFunctionBegin; 3130 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3131 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3132 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3133 3134 /* Setup local neumann solver ksp_R */ 3135 /* PCBDDCSetUpLocalScatters should be called first! */ 3136 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3137 3138 /* 3139 Setup local correction and local part of coarse basis. 3140 Gives back the dense local part of the coarse matrix in column major ordering 3141 */ 3142 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3143 3144 /* Compute total number of coarse nodes and setup coarse solver */ 3145 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3146 3147 /* free */ 3148 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3149 PetscFunctionReturn(0); 3150 } 3151 3152 PetscErrorCode PCBDDCResetCustomization(PC pc) 3153 { 3154 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3155 PetscErrorCode ierr; 3156 3157 PetscFunctionBegin; 3158 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3159 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3160 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3161 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3162 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3163 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3164 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3165 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3166 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3167 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3168 PetscFunctionReturn(0); 3169 } 3170 3171 PetscErrorCode PCBDDCResetTopography(PC pc) 3172 { 3173 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3174 PetscInt i; 3175 PetscErrorCode ierr; 3176 3177 PetscFunctionBegin; 3178 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3179 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3180 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3181 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3182 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3183 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3184 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3185 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3186 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3187 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3188 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3189 for (i=0;i<pcbddc->n_local_subs;i++) { 3190 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3191 } 3192 pcbddc->n_local_subs = 0; 3193 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3194 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3195 pcbddc->graphanalyzed = PETSC_FALSE; 3196 pcbddc->recompute_topography = PETSC_TRUE; 3197 PetscFunctionReturn(0); 3198 } 3199 3200 PetscErrorCode PCBDDCResetSolvers(PC pc) 3201 { 3202 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3203 PetscErrorCode ierr; 3204 3205 PetscFunctionBegin; 3206 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3207 if (pcbddc->coarse_phi_B) { 3208 PetscScalar *array; 3209 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3210 ierr = PetscFree(array);CHKERRQ(ierr); 3211 } 3212 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3213 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3214 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3215 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3216 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3217 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3218 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3219 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3220 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3221 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3222 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3223 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3224 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3225 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3226 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3227 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3228 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3229 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3230 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3231 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3232 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3233 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3234 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3235 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3236 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3237 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3238 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3239 if (pcbddc->benign_zerodiag_subs) { 3240 PetscInt i; 3241 for (i=0;i<pcbddc->benign_n;i++) { 3242 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3243 } 3244 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3245 } 3246 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3247 PetscFunctionReturn(0); 3248 } 3249 3250 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3251 { 3252 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3253 PC_IS *pcis = (PC_IS*)pc->data; 3254 VecType impVecType; 3255 PetscInt n_constraints,n_R,old_size; 3256 PetscErrorCode ierr; 3257 3258 PetscFunctionBegin; 3259 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3260 n_R = pcis->n - pcbddc->n_vertices; 3261 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3262 /* local work vectors (try to avoid unneeded work)*/ 3263 /* R nodes */ 3264 old_size = -1; 3265 if (pcbddc->vec1_R) { 3266 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3267 } 3268 if (n_R != old_size) { 3269 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3270 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3271 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3272 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3273 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3274 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3275 } 3276 /* local primal dofs */ 3277 old_size = -1; 3278 if (pcbddc->vec1_P) { 3279 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3280 } 3281 if (pcbddc->local_primal_size != old_size) { 3282 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3283 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3284 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3285 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3286 } 3287 /* local explicit constraints */ 3288 old_size = -1; 3289 if (pcbddc->vec1_C) { 3290 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3291 } 3292 if (n_constraints && n_constraints != old_size) { 3293 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3294 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3295 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3296 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3297 } 3298 PetscFunctionReturn(0); 3299 } 3300 3301 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3302 { 3303 PetscErrorCode ierr; 3304 /* pointers to pcis and pcbddc */ 3305 PC_IS* pcis = (PC_IS*)pc->data; 3306 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3307 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3308 /* submatrices of local problem */ 3309 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3310 /* submatrices of local coarse problem */ 3311 Mat S_VV,S_CV,S_VC,S_CC; 3312 /* working matrices */ 3313 Mat C_CR; 3314 /* additional working stuff */ 3315 PC pc_R; 3316 Mat F,Brhs = NULL; 3317 Vec dummy_vec; 3318 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3319 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3320 PetscScalar *work; 3321 PetscInt *idx_V_B; 3322 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3323 PetscInt i,n_R,n_D,n_B; 3324 3325 /* some shortcuts to scalars */ 3326 PetscScalar one=1.0,m_one=-1.0; 3327 3328 PetscFunctionBegin; 3329 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3330 3331 /* Set Non-overlapping dimensions */ 3332 n_vertices = pcbddc->n_vertices; 3333 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3334 n_B = pcis->n_B; 3335 n_D = pcis->n - n_B; 3336 n_R = pcis->n - n_vertices; 3337 3338 /* vertices in boundary numbering */ 3339 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3340 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3341 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3342 3343 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3344 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3345 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3346 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3347 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3348 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3349 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3350 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3352 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3353 3354 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3355 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3356 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3357 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3358 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3359 lda_rhs = n_R; 3360 need_benign_correction = PETSC_FALSE; 3361 if (isLU || isILU || isCHOL) { 3362 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3363 } else if (sub_schurs && sub_schurs->reuse_solver) { 3364 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3365 MatFactorType type; 3366 3367 F = reuse_solver->F; 3368 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3369 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3370 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3371 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3372 } else { 3373 F = NULL; 3374 } 3375 3376 /* determine if we can use a sparse right-hand side */ 3377 sparserhs = PETSC_FALSE; 3378 if (F) { 3379 const MatSolverPackage solver; 3380 3381 ierr = MatFactorGetSolverPackage(F,&solver);CHKERRQ(ierr); 3382 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3383 } 3384 3385 /* allocate workspace */ 3386 n = 0; 3387 if (n_constraints) { 3388 n += lda_rhs*n_constraints; 3389 } 3390 if (n_vertices) { 3391 n = PetscMax(2*lda_rhs*n_vertices,n); 3392 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3393 } 3394 if (!pcbddc->symmetric_primal) { 3395 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3396 } 3397 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3398 3399 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3400 dummy_vec = NULL; 3401 if (need_benign_correction && lda_rhs != n_R && F) { 3402 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3403 } 3404 3405 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3406 if (n_constraints) { 3407 Mat M1,M2,M3,C_B; 3408 IS is_aux; 3409 PetscScalar *array,*array2; 3410 3411 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3412 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3413 3414 /* Extract constraints on R nodes: C_{CR} */ 3415 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3416 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3417 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3418 3419 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3420 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3421 if (!sparserhs) { 3422 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3423 for (i=0;i<n_constraints;i++) { 3424 const PetscScalar *row_cmat_values; 3425 const PetscInt *row_cmat_indices; 3426 PetscInt size_of_constraint,j; 3427 3428 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3429 for (j=0;j<size_of_constraint;j++) { 3430 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3431 } 3432 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3433 } 3434 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3435 } else { 3436 Mat tC_CR; 3437 3438 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3439 if (lda_rhs != n_R) { 3440 PetscScalar *aa; 3441 PetscInt r,*ii,*jj; 3442 PetscBool done; 3443 3444 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3445 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");CHKERRQ(ierr); 3446 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3447 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3448 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3449 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");CHKERRQ(ierr); 3450 } else { 3451 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3452 tC_CR = C_CR; 3453 } 3454 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3455 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3456 } 3457 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3458 if (F) { 3459 if (need_benign_correction) { 3460 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3461 3462 /* rhs is already zero on interior dofs, no need to change the rhs */ 3463 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3464 } 3465 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3466 if (need_benign_correction) { 3467 PetscScalar *marr; 3468 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3469 3470 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3471 if (lda_rhs != n_R) { 3472 for (i=0;i<n_constraints;i++) { 3473 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3474 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3475 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3476 } 3477 } else { 3478 for (i=0;i<n_constraints;i++) { 3479 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3480 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3481 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3482 } 3483 } 3484 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3485 } 3486 } else { 3487 PetscScalar *marr; 3488 3489 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3490 for (i=0;i<n_constraints;i++) { 3491 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3492 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3493 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3494 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3495 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3496 } 3497 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3498 } 3499 if (sparserhs) { 3500 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3501 } 3502 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3503 if (!pcbddc->switch_static) { 3504 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3505 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3506 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3507 for (i=0;i<n_constraints;i++) { 3508 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3509 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3510 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3511 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3512 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3513 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3514 } 3515 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3516 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3517 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3518 } else { 3519 if (lda_rhs != n_R) { 3520 IS dummy; 3521 3522 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3523 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3524 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3525 } else { 3526 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3527 pcbddc->local_auxmat2 = local_auxmat2_R; 3528 } 3529 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3530 } 3531 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3532 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3533 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3534 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3535 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3536 if (isCHOL) { 3537 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3538 } else { 3539 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3540 } 3541 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3542 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3543 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3544 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3545 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3546 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3547 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3548 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3549 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3550 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3551 } 3552 3553 /* Get submatrices from subdomain matrix */ 3554 if (n_vertices) { 3555 IS is_aux; 3556 PetscBool isseqaij; 3557 3558 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3559 IS tis; 3560 3561 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3562 ierr = ISSort(tis);CHKERRQ(ierr); 3563 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3564 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3565 } else { 3566 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3567 } 3568 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3569 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3570 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3571 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3572 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3573 } 3574 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3575 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3576 } 3577 3578 /* Matrix of coarse basis functions (local) */ 3579 if (pcbddc->coarse_phi_B) { 3580 PetscInt on_B,on_primal,on_D=n_D; 3581 if (pcbddc->coarse_phi_D) { 3582 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3583 } 3584 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3585 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3586 PetscScalar *marray; 3587 3588 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3589 ierr = PetscFree(marray);CHKERRQ(ierr); 3590 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3591 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3592 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3593 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3594 } 3595 } 3596 3597 if (!pcbddc->coarse_phi_B) { 3598 PetscScalar *marr; 3599 3600 /* memory size */ 3601 n = n_B*pcbddc->local_primal_size; 3602 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3603 if (!pcbddc->symmetric_primal) n *= 2; 3604 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3605 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3606 marr += n_B*pcbddc->local_primal_size; 3607 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3608 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3609 marr += n_D*pcbddc->local_primal_size; 3610 } 3611 if (!pcbddc->symmetric_primal) { 3612 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3613 marr += n_B*pcbddc->local_primal_size; 3614 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3616 } 3617 } else { 3618 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3619 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3620 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3621 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3622 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3623 } 3624 } 3625 } 3626 3627 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3628 p0_lidx_I = NULL; 3629 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3630 const PetscInt *idxs; 3631 3632 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3633 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3634 for (i=0;i<pcbddc->benign_n;i++) { 3635 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3636 } 3637 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3638 } 3639 3640 /* vertices */ 3641 if (n_vertices) { 3642 PetscBool restoreavr = PETSC_FALSE; 3643 3644 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3645 3646 if (n_R) { 3647 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3648 PetscBLASInt B_N,B_one = 1; 3649 PetscScalar *x,*y; 3650 3651 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3652 if (need_benign_correction) { 3653 ISLocalToGlobalMapping RtoN; 3654 IS is_p0; 3655 PetscInt *idxs_p0,n; 3656 3657 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3658 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3659 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3660 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3661 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3662 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3663 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3664 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3665 } 3666 3667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3668 if (!sparserhs || need_benign_correction) { 3669 if (lda_rhs == n_R) { 3670 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3671 } else { 3672 PetscScalar *av,*array; 3673 const PetscInt *xadj,*adjncy; 3674 PetscInt n; 3675 PetscBool flg_row; 3676 3677 array = work+lda_rhs*n_vertices; 3678 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3679 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3680 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3681 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3682 for (i=0;i<n;i++) { 3683 PetscInt j; 3684 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3685 } 3686 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3687 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3688 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3689 } 3690 if (need_benign_correction) { 3691 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3692 PetscScalar *marr; 3693 3694 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3695 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3696 3697 | 0 0 0 | (V) 3698 L = | 0 0 -1 | (P-p0) 3699 | 0 0 -1 | (p0) 3700 3701 */ 3702 for (i=0;i<reuse_solver->benign_n;i++) { 3703 const PetscScalar *vals; 3704 const PetscInt *idxs,*idxs_zero; 3705 PetscInt n,j,nz; 3706 3707 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3708 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3709 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3710 for (j=0;j<n;j++) { 3711 PetscScalar val = vals[j]; 3712 PetscInt k,col = idxs[j]; 3713 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3714 } 3715 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3716 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3717 } 3718 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3719 } 3720 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3721 Brhs = A_RV; 3722 } else { 3723 Mat tA_RVT,A_RVT; 3724 3725 if (!pcbddc->symmetric_primal) { 3726 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3727 } else { 3728 restoreavr = PETSC_TRUE; 3729 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3730 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3731 A_RVT = A_VR; 3732 } 3733 if (lda_rhs != n_R) { 3734 PetscScalar *aa; 3735 PetscInt r,*ii,*jj; 3736 PetscBool done; 3737 3738 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3739 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");CHKERRQ(ierr); 3740 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 3741 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 3742 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3743 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");CHKERRQ(ierr); 3744 } else { 3745 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 3746 tA_RVT = A_RVT; 3747 } 3748 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 3749 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 3750 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 3751 } 3752 if (F) { 3753 /* need to correct the rhs */ 3754 if (need_benign_correction) { 3755 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3756 PetscScalar *marr; 3757 3758 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 3759 if (lda_rhs != n_R) { 3760 for (i=0;i<n_vertices;i++) { 3761 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3762 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3763 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3764 } 3765 } else { 3766 for (i=0;i<n_vertices;i++) { 3767 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3768 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3769 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3770 } 3771 } 3772 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 3773 } 3774 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 3775 if (restoreavr) { 3776 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3777 } 3778 /* need to correct the solution */ 3779 if (need_benign_correction) { 3780 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3781 PetscScalar *marr; 3782 3783 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3784 if (lda_rhs != n_R) { 3785 for (i=0;i<n_vertices;i++) { 3786 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3787 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3788 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3789 } 3790 } else { 3791 for (i=0;i<n_vertices;i++) { 3792 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3793 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3794 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3795 } 3796 } 3797 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3798 } 3799 } else { 3800 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 3801 for (i=0;i<n_vertices;i++) { 3802 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3803 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3804 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3805 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3806 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3807 } 3808 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 3809 } 3810 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3811 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3812 /* S_VV and S_CV */ 3813 if (n_constraints) { 3814 Mat B; 3815 3816 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3817 for (i=0;i<n_vertices;i++) { 3818 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3819 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3820 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3821 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3822 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3823 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3824 } 3825 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3826 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3827 ierr = MatDestroy(&B);CHKERRQ(ierr); 3828 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3829 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3830 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3831 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3832 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3833 ierr = MatDestroy(&B);CHKERRQ(ierr); 3834 } 3835 if (lda_rhs != n_R) { 3836 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3837 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3838 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3839 } 3840 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3841 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3842 if (need_benign_correction) { 3843 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3844 PetscScalar *marr,*sums; 3845 3846 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3847 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3848 for (i=0;i<reuse_solver->benign_n;i++) { 3849 const PetscScalar *vals; 3850 const PetscInt *idxs,*idxs_zero; 3851 PetscInt n,j,nz; 3852 3853 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3854 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3855 for (j=0;j<n_vertices;j++) { 3856 PetscInt k; 3857 sums[j] = 0.; 3858 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3859 } 3860 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3861 for (j=0;j<n;j++) { 3862 PetscScalar val = vals[j]; 3863 PetscInt k; 3864 for (k=0;k<n_vertices;k++) { 3865 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3866 } 3867 } 3868 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3869 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3870 } 3871 ierr = PetscFree(sums);CHKERRQ(ierr); 3872 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3873 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3874 } 3875 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3876 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3877 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3878 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3879 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3880 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3881 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3882 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3883 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3884 } else { 3885 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3886 } 3887 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3888 3889 /* coarse basis functions */ 3890 for (i=0;i<n_vertices;i++) { 3891 PetscScalar *y; 3892 3893 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3894 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3895 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3896 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3897 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3898 y[n_B*i+idx_V_B[i]] = 1.0; 3899 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3900 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3901 3902 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3903 PetscInt j; 3904 3905 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3906 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3907 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3908 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3909 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3910 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3911 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3912 } 3913 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3914 } 3915 /* if n_R == 0 the object is not destroyed */ 3916 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3917 } 3918 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3919 3920 if (n_constraints) { 3921 Mat B; 3922 3923 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3924 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3925 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3926 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3927 if (n_vertices) { 3928 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3929 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3930 } else { 3931 Mat S_VCt; 3932 3933 if (lda_rhs != n_R) { 3934 ierr = MatDestroy(&B);CHKERRQ(ierr); 3935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3936 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3937 } 3938 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3939 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3940 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3941 } 3942 } 3943 ierr = MatDestroy(&B);CHKERRQ(ierr); 3944 /* coarse basis functions */ 3945 for (i=0;i<n_constraints;i++) { 3946 PetscScalar *y; 3947 3948 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3949 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3950 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3951 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3952 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3953 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3954 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3955 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3956 PetscInt j; 3957 3958 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3959 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3960 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3961 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3962 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3963 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3964 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3965 } 3966 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3967 } 3968 } 3969 if (n_constraints) { 3970 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3971 } 3972 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3973 3974 /* coarse matrix entries relative to B_0 */ 3975 if (pcbddc->benign_n) { 3976 Mat B0_B,B0_BPHI; 3977 IS is_dummy; 3978 PetscScalar *data; 3979 PetscInt j; 3980 3981 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3982 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3983 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3984 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3985 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3986 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3987 for (j=0;j<pcbddc->benign_n;j++) { 3988 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3989 for (i=0;i<pcbddc->local_primal_size;i++) { 3990 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3991 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3992 } 3993 } 3994 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3995 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3996 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3997 } 3998 3999 /* compute other basis functions for non-symmetric problems */ 4000 if (!pcbddc->symmetric_primal) { 4001 Mat B_V=NULL,B_C=NULL; 4002 PetscScalar *marray; 4003 4004 if (n_constraints) { 4005 Mat S_CCT,C_CRT; 4006 4007 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4008 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4009 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4010 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4011 if (n_vertices) { 4012 Mat S_VCT; 4013 4014 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4015 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4016 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4017 } 4018 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4019 } else { 4020 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4021 } 4022 if (n_vertices && n_R) { 4023 PetscScalar *av,*marray; 4024 const PetscInt *xadj,*adjncy; 4025 PetscInt n; 4026 PetscBool flg_row; 4027 4028 /* B_V = B_V - A_VR^T */ 4029 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4030 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4031 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4032 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4033 for (i=0;i<n;i++) { 4034 PetscInt j; 4035 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4036 } 4037 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4038 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4039 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4040 } 4041 4042 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4043 if (n_vertices) { 4044 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4045 for (i=0;i<n_vertices;i++) { 4046 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4047 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4048 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4049 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4050 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4051 } 4052 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4053 } 4054 if (B_C) { 4055 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4056 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4057 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4058 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4059 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4060 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4061 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4062 } 4063 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4064 } 4065 /* coarse basis functions */ 4066 for (i=0;i<pcbddc->local_primal_size;i++) { 4067 PetscScalar *y; 4068 4069 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4070 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4071 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4072 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4073 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4074 if (i<n_vertices) { 4075 y[n_B*i+idx_V_B[i]] = 1.0; 4076 } 4077 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4078 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4079 4080 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4081 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4082 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4083 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4084 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4085 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4086 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4087 } 4088 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4089 } 4090 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4091 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4092 } 4093 4094 /* free memory */ 4095 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4096 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4097 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4098 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4099 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4100 ierr = PetscFree(work);CHKERRQ(ierr); 4101 if (n_vertices) { 4102 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4103 } 4104 if (n_constraints) { 4105 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4106 } 4107 /* Checking coarse_sub_mat and coarse basis functios */ 4108 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4109 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4110 if (pcbddc->dbg_flag) { 4111 Mat coarse_sub_mat; 4112 Mat AUXMAT,TM1,TM2,TM3,TM4; 4113 Mat coarse_phi_D,coarse_phi_B; 4114 Mat coarse_psi_D,coarse_psi_B; 4115 Mat A_II,A_BB,A_IB,A_BI; 4116 Mat C_B,CPHI; 4117 IS is_dummy; 4118 Vec mones; 4119 MatType checkmattype=MATSEQAIJ; 4120 PetscReal real_value; 4121 4122 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4123 Mat A; 4124 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4125 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4126 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4127 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4128 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4129 ierr = MatDestroy(&A);CHKERRQ(ierr); 4130 } else { 4131 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4132 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4133 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4134 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4135 } 4136 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4137 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4138 if (!pcbddc->symmetric_primal) { 4139 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4140 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4141 } 4142 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4143 4144 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4146 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4147 if (!pcbddc->symmetric_primal) { 4148 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4149 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4150 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4151 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4152 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4153 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4154 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4155 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4156 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4157 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4158 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4159 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4160 } else { 4161 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4162 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4163 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4164 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4165 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4166 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4167 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4168 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4169 } 4170 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4171 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4172 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4173 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4174 if (pcbddc->benign_n) { 4175 Mat B0_B,B0_BPHI; 4176 PetscScalar *data,*data2; 4177 PetscInt j; 4178 4179 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4180 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4181 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4182 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4183 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4184 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4185 for (j=0;j<pcbddc->benign_n;j++) { 4186 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4187 for (i=0;i<pcbddc->local_primal_size;i++) { 4188 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4189 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4190 } 4191 } 4192 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4193 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4194 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4195 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4196 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4197 } 4198 #if 0 4199 { 4200 PetscViewer viewer; 4201 char filename[256]; 4202 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4203 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4204 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4205 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4206 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4207 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4208 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4209 if (save_change) { 4210 Mat phi_B; 4211 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4212 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4213 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4214 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4215 } else { 4216 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4217 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4218 } 4219 if (pcbddc->coarse_phi_D) { 4220 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4221 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4222 } 4223 if (pcbddc->coarse_psi_B) { 4224 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4225 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4226 } 4227 if (pcbddc->coarse_psi_D) { 4228 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4229 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4230 } 4231 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4232 } 4233 #endif 4234 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4235 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4236 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4238 4239 /* check constraints */ 4240 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4241 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4242 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4243 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4244 } else { 4245 PetscScalar *data; 4246 Mat tmat; 4247 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4248 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4249 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4250 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4251 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4252 } 4253 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4254 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4255 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4256 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4257 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4258 if (!pcbddc->symmetric_primal) { 4259 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4260 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4261 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4262 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4264 } 4265 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4266 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4267 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4268 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4269 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4270 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4271 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4272 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4273 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4274 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4275 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4276 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4277 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4278 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4279 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4280 if (!pcbddc->symmetric_primal) { 4281 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4282 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4283 } 4284 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4285 } 4286 /* get back data */ 4287 *coarse_submat_vals_n = coarse_submat_vals; 4288 PetscFunctionReturn(0); 4289 } 4290 4291 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4292 { 4293 Mat *work_mat; 4294 IS isrow_s,iscol_s; 4295 PetscBool rsorted,csorted; 4296 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4297 PetscErrorCode ierr; 4298 4299 PetscFunctionBegin; 4300 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4301 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4302 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4303 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4304 4305 if (!rsorted) { 4306 const PetscInt *idxs; 4307 PetscInt *idxs_sorted,i; 4308 4309 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4310 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4311 for (i=0;i<rsize;i++) { 4312 idxs_perm_r[i] = i; 4313 } 4314 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4315 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4316 for (i=0;i<rsize;i++) { 4317 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4318 } 4319 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4320 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4321 } else { 4322 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4323 isrow_s = isrow; 4324 } 4325 4326 if (!csorted) { 4327 if (isrow == iscol) { 4328 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4329 iscol_s = isrow_s; 4330 } else { 4331 const PetscInt *idxs; 4332 PetscInt *idxs_sorted,i; 4333 4334 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4335 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4336 for (i=0;i<csize;i++) { 4337 idxs_perm_c[i] = i; 4338 } 4339 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4340 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4341 for (i=0;i<csize;i++) { 4342 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4343 } 4344 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4345 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4346 } 4347 } else { 4348 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4349 iscol_s = iscol; 4350 } 4351 4352 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4353 4354 if (!rsorted || !csorted) { 4355 Mat new_mat; 4356 IS is_perm_r,is_perm_c; 4357 4358 if (!rsorted) { 4359 PetscInt *idxs_r,i; 4360 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4361 for (i=0;i<rsize;i++) { 4362 idxs_r[idxs_perm_r[i]] = i; 4363 } 4364 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4365 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4366 } else { 4367 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4368 } 4369 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4370 4371 if (!csorted) { 4372 if (isrow_s == iscol_s) { 4373 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4374 is_perm_c = is_perm_r; 4375 } else { 4376 PetscInt *idxs_c,i; 4377 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4378 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4379 for (i=0;i<csize;i++) { 4380 idxs_c[idxs_perm_c[i]] = i; 4381 } 4382 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4383 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4384 } 4385 } else { 4386 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4387 } 4388 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4389 4390 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4391 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4392 work_mat[0] = new_mat; 4393 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4394 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4395 } 4396 4397 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4398 *B = work_mat[0]; 4399 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4400 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4401 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4402 PetscFunctionReturn(0); 4403 } 4404 4405 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4406 { 4407 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4408 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4409 Mat new_mat,lA; 4410 IS is_local,is_global; 4411 PetscInt local_size; 4412 PetscBool isseqaij; 4413 PetscErrorCode ierr; 4414 4415 PetscFunctionBegin; 4416 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4417 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4418 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4419 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4420 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4421 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4422 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4423 4424 /* check */ 4425 if (pcbddc->dbg_flag) { 4426 Vec x,x_change; 4427 PetscReal error; 4428 4429 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4430 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4431 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4432 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4433 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4434 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4435 if (!pcbddc->change_interior) { 4436 const PetscScalar *x,*y,*v; 4437 PetscReal lerror = 0.; 4438 PetscInt i; 4439 4440 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4441 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4442 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4443 for (i=0;i<local_size;i++) 4444 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4445 lerror = PetscAbsScalar(x[i]-y[i]); 4446 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4447 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4448 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4449 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4450 if (error > PETSC_SMALL) { 4451 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4452 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4453 } else { 4454 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4455 } 4456 } 4457 } 4458 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4459 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4460 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4461 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4462 if (error > PETSC_SMALL) { 4463 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4464 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4465 } else { 4466 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4467 } 4468 } 4469 ierr = VecDestroy(&x);CHKERRQ(ierr); 4470 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4471 } 4472 4473 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4474 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4475 4476 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4477 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4478 if (isseqaij) { 4479 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4480 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4481 if (lA) { 4482 Mat work; 4483 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4484 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4485 ierr = MatDestroy(&work);CHKERRQ(ierr); 4486 } 4487 } else { 4488 Mat work_mat; 4489 4490 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4491 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4492 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4493 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4494 if (lA) { 4495 Mat work; 4496 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4497 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4498 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4499 ierr = MatDestroy(&work);CHKERRQ(ierr); 4500 } 4501 } 4502 if (matis->A->symmetric_set) { 4503 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4504 #if !defined(PETSC_USE_COMPLEX) 4505 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4506 #endif 4507 } 4508 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4513 { 4514 PC_IS* pcis = (PC_IS*)(pc->data); 4515 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4516 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4517 PetscInt *idx_R_local=NULL; 4518 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4519 PetscInt vbs,bs; 4520 PetscBT bitmask=NULL; 4521 PetscErrorCode ierr; 4522 4523 PetscFunctionBegin; 4524 /* 4525 No need to setup local scatters if 4526 - primal space is unchanged 4527 AND 4528 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4529 AND 4530 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4531 */ 4532 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4533 PetscFunctionReturn(0); 4534 } 4535 /* destroy old objects */ 4536 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4537 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4538 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4539 /* Set Non-overlapping dimensions */ 4540 n_B = pcis->n_B; 4541 n_D = pcis->n - n_B; 4542 n_vertices = pcbddc->n_vertices; 4543 4544 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4545 4546 /* create auxiliary bitmask and allocate workspace */ 4547 if (!sub_schurs || !sub_schurs->reuse_solver) { 4548 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4549 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4550 for (i=0;i<n_vertices;i++) { 4551 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4552 } 4553 4554 for (i=0, n_R=0; i<pcis->n; i++) { 4555 if (!PetscBTLookup(bitmask,i)) { 4556 idx_R_local[n_R++] = i; 4557 } 4558 } 4559 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4560 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4561 4562 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4563 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4564 } 4565 4566 /* Block code */ 4567 vbs = 1; 4568 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4569 if (bs>1 && !(n_vertices%bs)) { 4570 PetscBool is_blocked = PETSC_TRUE; 4571 PetscInt *vary; 4572 if (!sub_schurs || !sub_schurs->reuse_solver) { 4573 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4574 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4575 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4576 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4577 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4578 for (i=0; i<pcis->n/bs; i++) { 4579 if (vary[i]!=0 && vary[i]!=bs) { 4580 is_blocked = PETSC_FALSE; 4581 break; 4582 } 4583 } 4584 ierr = PetscFree(vary);CHKERRQ(ierr); 4585 } else { 4586 /* Verify directly the R set */ 4587 for (i=0; i<n_R/bs; i++) { 4588 PetscInt j,node=idx_R_local[bs*i]; 4589 for (j=1; j<bs; j++) { 4590 if (node != idx_R_local[bs*i+j]-j) { 4591 is_blocked = PETSC_FALSE; 4592 break; 4593 } 4594 } 4595 } 4596 } 4597 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4598 vbs = bs; 4599 for (i=0;i<n_R/vbs;i++) { 4600 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4601 } 4602 } 4603 } 4604 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4605 if (sub_schurs && sub_schurs->reuse_solver) { 4606 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4607 4608 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4609 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4610 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4611 reuse_solver->is_R = pcbddc->is_R_local; 4612 } else { 4613 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4614 } 4615 4616 /* print some info if requested */ 4617 if (pcbddc->dbg_flag) { 4618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4619 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4620 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4624 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4625 } 4626 4627 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4628 if (!sub_schurs || !sub_schurs->reuse_solver) { 4629 IS is_aux1,is_aux2; 4630 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4631 4632 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4633 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4634 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4635 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4636 for (i=0; i<n_D; i++) { 4637 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4638 } 4639 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4640 for (i=0, j=0; i<n_R; i++) { 4641 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4642 aux_array1[j++] = i; 4643 } 4644 } 4645 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4646 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4647 for (i=0, j=0; i<n_B; i++) { 4648 if (!PetscBTLookup(bitmask,is_indices[i])) { 4649 aux_array2[j++] = i; 4650 } 4651 } 4652 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4653 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4654 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4655 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4656 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4657 4658 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4659 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4660 for (i=0, j=0; i<n_R; i++) { 4661 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4662 aux_array1[j++] = i; 4663 } 4664 } 4665 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4666 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4667 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4668 } 4669 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4670 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4671 } else { 4672 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4673 IS tis; 4674 PetscInt schur_size; 4675 4676 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4677 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4678 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4679 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4680 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4681 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4682 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4683 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4684 } 4685 } 4686 PetscFunctionReturn(0); 4687 } 4688 4689 4690 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4691 { 4692 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4693 PC_IS *pcis = (PC_IS*)pc->data; 4694 PC pc_temp; 4695 Mat A_RR; 4696 MatReuse reuse; 4697 PetscScalar m_one = -1.0; 4698 PetscReal value; 4699 PetscInt n_D,n_R; 4700 PetscBool check_corr[2],issbaij; 4701 PetscErrorCode ierr; 4702 /* prefixes stuff */ 4703 char dir_prefix[256],neu_prefix[256],str_level[16]; 4704 size_t len; 4705 4706 PetscFunctionBegin; 4707 4708 /* compute prefixes */ 4709 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4710 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4711 if (!pcbddc->current_level) { 4712 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4713 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4714 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4715 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4716 } else { 4717 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4718 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4719 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4720 len -= 15; /* remove "pc_bddc_coarse_" */ 4721 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4722 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4723 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4724 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4725 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4726 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4727 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4728 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4729 } 4730 4731 /* DIRICHLET PROBLEM */ 4732 if (dirichlet) { 4733 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4734 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4735 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4736 if (pcbddc->dbg_flag) { 4737 Mat A_IIn; 4738 4739 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4740 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4741 pcis->A_II = A_IIn; 4742 } 4743 } 4744 if (pcbddc->local_mat->symmetric_set) { 4745 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4746 } 4747 /* Matrix for Dirichlet problem is pcis->A_II */ 4748 n_D = pcis->n - pcis->n_B; 4749 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4750 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4751 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4752 /* default */ 4753 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4754 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4755 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4756 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4757 if (issbaij) { 4758 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4759 } else { 4760 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4761 } 4762 /* Allow user's customization */ 4763 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4764 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4765 } 4766 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4767 if (sub_schurs && sub_schurs->reuse_solver) { 4768 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4769 4770 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4771 } 4772 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4773 if (!n_D) { 4774 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4775 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4776 } 4777 /* Set Up KSP for Dirichlet problem of BDDC */ 4778 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4779 /* set ksp_D into pcis data */ 4780 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4781 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4782 pcis->ksp_D = pcbddc->ksp_D; 4783 } 4784 4785 /* NEUMANN PROBLEM */ 4786 A_RR = 0; 4787 if (neumann) { 4788 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4789 PetscInt ibs,mbs; 4790 PetscBool issbaij; 4791 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4792 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4793 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4794 if (pcbddc->ksp_R) { /* already created ksp */ 4795 PetscInt nn_R; 4796 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4797 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4798 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4799 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4800 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4801 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4802 reuse = MAT_INITIAL_MATRIX; 4803 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4804 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4805 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4806 reuse = MAT_INITIAL_MATRIX; 4807 } else { /* safe to reuse the matrix */ 4808 reuse = MAT_REUSE_MATRIX; 4809 } 4810 } 4811 /* last check */ 4812 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4813 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4814 reuse = MAT_INITIAL_MATRIX; 4815 } 4816 } else { /* first time, so we need to create the matrix */ 4817 reuse = MAT_INITIAL_MATRIX; 4818 } 4819 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4820 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4821 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4822 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4823 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4824 if (matis->A == pcbddc->local_mat) { 4825 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4826 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4827 } else { 4828 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4829 } 4830 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4831 if (matis->A == pcbddc->local_mat) { 4832 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4833 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4834 } else { 4835 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4836 } 4837 } 4838 /* extract A_RR */ 4839 if (sub_schurs && sub_schurs->reuse_solver) { 4840 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4841 4842 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4843 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4844 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4845 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4846 } else { 4847 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4848 } 4849 } else { 4850 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4851 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4852 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4853 } 4854 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4855 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4856 } 4857 if (pcbddc->local_mat->symmetric_set) { 4858 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4859 } 4860 if (!pcbddc->ksp_R) { /* create object if not present */ 4861 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4862 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4863 /* default */ 4864 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4865 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4866 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4867 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4868 if (issbaij) { 4869 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4870 } else { 4871 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4872 } 4873 /* Allow user's customization */ 4874 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4875 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4876 } 4877 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4878 if (!n_R) { 4879 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4880 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4881 } 4882 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4883 /* Reuse solver if it is present */ 4884 if (sub_schurs && sub_schurs->reuse_solver) { 4885 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4886 4887 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4888 } 4889 /* Set Up KSP for Neumann problem of BDDC */ 4890 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4891 } 4892 4893 if (pcbddc->dbg_flag) { 4894 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4895 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4896 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4897 } 4898 4899 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4900 check_corr[0] = check_corr[1] = PETSC_FALSE; 4901 if (pcbddc->NullSpace_corr[0]) { 4902 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4903 } 4904 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4905 check_corr[0] = PETSC_TRUE; 4906 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4907 } 4908 if (neumann && pcbddc->NullSpace_corr[2]) { 4909 check_corr[1] = PETSC_TRUE; 4910 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4911 } 4912 4913 /* check Dirichlet and Neumann solvers */ 4914 if (pcbddc->dbg_flag) { 4915 if (dirichlet) { /* Dirichlet */ 4916 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4917 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4918 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4919 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4920 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4921 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4922 if (check_corr[0]) { 4923 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4924 } 4925 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4926 } 4927 if (neumann) { /* Neumann */ 4928 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4929 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4930 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4931 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4932 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4933 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4934 if (check_corr[1]) { 4935 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4936 } 4937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4938 } 4939 } 4940 /* free Neumann problem's matrix */ 4941 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4942 PetscFunctionReturn(0); 4943 } 4944 4945 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4946 { 4947 PetscErrorCode ierr; 4948 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4949 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4950 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4951 4952 PetscFunctionBegin; 4953 if (!reuse_solver) { 4954 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4955 } 4956 if (!pcbddc->switch_static) { 4957 if (applytranspose && pcbddc->local_auxmat1) { 4958 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4959 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4960 } 4961 if (!reuse_solver) { 4962 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4963 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4964 } else { 4965 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4966 4967 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4968 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4969 } 4970 } else { 4971 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4972 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4973 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4974 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4975 if (applytranspose && pcbddc->local_auxmat1) { 4976 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4977 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4978 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4979 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4980 } 4981 } 4982 if (!reuse_solver || pcbddc->switch_static) { 4983 if (applytranspose) { 4984 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4985 } else { 4986 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4987 } 4988 } else { 4989 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4990 4991 if (applytranspose) { 4992 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4993 } else { 4994 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4995 } 4996 } 4997 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4998 if (!pcbddc->switch_static) { 4999 if (!reuse_solver) { 5000 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5001 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5002 } else { 5003 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5004 5005 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5006 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5007 } 5008 if (!applytranspose && pcbddc->local_auxmat1) { 5009 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5010 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5011 } 5012 } else { 5013 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5014 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5015 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5016 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5017 if (!applytranspose && pcbddc->local_auxmat1) { 5018 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5019 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5020 } 5021 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5022 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5023 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5024 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5025 } 5026 PetscFunctionReturn(0); 5027 } 5028 5029 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5030 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5031 { 5032 PetscErrorCode ierr; 5033 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5034 PC_IS* pcis = (PC_IS*) (pc->data); 5035 const PetscScalar zero = 0.0; 5036 5037 PetscFunctionBegin; 5038 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5039 if (!pcbddc->benign_apply_coarse_only) { 5040 if (applytranspose) { 5041 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5042 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5043 } else { 5044 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5045 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5046 } 5047 } else { 5048 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5049 } 5050 5051 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5052 if (pcbddc->benign_n) { 5053 PetscScalar *array; 5054 PetscInt j; 5055 5056 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5057 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5058 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5059 } 5060 5061 /* start communications from local primal nodes to rhs of coarse solver */ 5062 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5063 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5064 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5065 5066 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5067 if (pcbddc->coarse_ksp) { 5068 Mat coarse_mat; 5069 Vec rhs,sol; 5070 MatNullSpace nullsp; 5071 PetscBool isbddc = PETSC_FALSE; 5072 5073 if (pcbddc->benign_have_null) { 5074 PC coarse_pc; 5075 5076 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5077 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5078 /* we need to propagate to coarser levels the need for a possible benign correction */ 5079 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5080 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5081 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5082 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5083 } 5084 } 5085 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5086 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5087 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5088 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5089 if (nullsp) { 5090 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5091 } 5092 if (applytranspose) { 5093 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5094 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5095 } else { 5096 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5097 PC coarse_pc; 5098 5099 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5100 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5101 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5102 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5103 } else { 5104 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5105 } 5106 } 5107 /* we don't need the benign correction at coarser levels anymore */ 5108 if (pcbddc->benign_have_null && isbddc) { 5109 PC coarse_pc; 5110 PC_BDDC* coarsepcbddc; 5111 5112 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5113 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5114 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5115 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5116 } 5117 if (nullsp) { 5118 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5119 } 5120 } 5121 5122 /* Local solution on R nodes */ 5123 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5124 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5125 } 5126 /* communications from coarse sol to local primal nodes */ 5127 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5128 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5129 5130 /* Sum contributions from the two levels */ 5131 if (!pcbddc->benign_apply_coarse_only) { 5132 if (applytranspose) { 5133 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5134 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5135 } else { 5136 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5137 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5138 } 5139 /* store p0 */ 5140 if (pcbddc->benign_n) { 5141 PetscScalar *array; 5142 PetscInt j; 5143 5144 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5145 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5146 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5147 } 5148 } else { /* expand the coarse solution */ 5149 if (applytranspose) { 5150 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5151 } else { 5152 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5153 } 5154 } 5155 PetscFunctionReturn(0); 5156 } 5157 5158 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5159 { 5160 PetscErrorCode ierr; 5161 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5162 PetscScalar *array; 5163 Vec from,to; 5164 5165 PetscFunctionBegin; 5166 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5167 from = pcbddc->coarse_vec; 5168 to = pcbddc->vec1_P; 5169 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5170 Vec tvec; 5171 5172 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5173 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5174 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5175 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5176 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5177 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5178 } 5179 } else { /* from local to global -> put data in coarse right hand side */ 5180 from = pcbddc->vec1_P; 5181 to = pcbddc->coarse_vec; 5182 } 5183 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5184 PetscFunctionReturn(0); 5185 } 5186 5187 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5188 { 5189 PetscErrorCode ierr; 5190 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5191 PetscScalar *array; 5192 Vec from,to; 5193 5194 PetscFunctionBegin; 5195 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5196 from = pcbddc->coarse_vec; 5197 to = pcbddc->vec1_P; 5198 } else { /* from local to global -> put data in coarse right hand side */ 5199 from = pcbddc->vec1_P; 5200 to = pcbddc->coarse_vec; 5201 } 5202 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5203 if (smode == SCATTER_FORWARD) { 5204 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5205 Vec tvec; 5206 5207 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5208 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5209 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5210 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5211 } 5212 } else { 5213 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5214 ierr = VecResetArray(from);CHKERRQ(ierr); 5215 } 5216 } 5217 PetscFunctionReturn(0); 5218 } 5219 5220 /* uncomment for testing purposes */ 5221 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5222 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5223 { 5224 PetscErrorCode ierr; 5225 PC_IS* pcis = (PC_IS*)(pc->data); 5226 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5227 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5228 /* one and zero */ 5229 PetscScalar one=1.0,zero=0.0; 5230 /* space to store constraints and their local indices */ 5231 PetscScalar *constraints_data; 5232 PetscInt *constraints_idxs,*constraints_idxs_B; 5233 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5234 PetscInt *constraints_n; 5235 /* iterators */ 5236 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5237 /* BLAS integers */ 5238 PetscBLASInt lwork,lierr; 5239 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5240 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5241 /* reuse */ 5242 PetscInt olocal_primal_size,olocal_primal_size_cc; 5243 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5244 /* change of basis */ 5245 PetscBool qr_needed; 5246 PetscBT change_basis,qr_needed_idx; 5247 /* auxiliary stuff */ 5248 PetscInt *nnz,*is_indices; 5249 PetscInt ncc; 5250 /* some quantities */ 5251 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5252 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5253 5254 PetscFunctionBegin; 5255 /* Destroy Mat objects computed previously */ 5256 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5257 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5258 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5259 /* save info on constraints from previous setup (if any) */ 5260 olocal_primal_size = pcbddc->local_primal_size; 5261 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5262 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5263 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5264 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5265 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5266 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5267 5268 if (!pcbddc->adaptive_selection) { 5269 IS ISForVertices,*ISForFaces,*ISForEdges; 5270 MatNullSpace nearnullsp; 5271 const Vec *nearnullvecs; 5272 Vec *localnearnullsp; 5273 PetscScalar *array; 5274 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5275 PetscBool nnsp_has_cnst; 5276 /* LAPACK working arrays for SVD or POD */ 5277 PetscBool skip_lapack,boolforchange; 5278 PetscScalar *work; 5279 PetscReal *singular_vals; 5280 #if defined(PETSC_USE_COMPLEX) 5281 PetscReal *rwork; 5282 #endif 5283 #if defined(PETSC_MISSING_LAPACK_GESVD) 5284 PetscScalar *temp_basis,*correlation_mat; 5285 #else 5286 PetscBLASInt dummy_int=1; 5287 PetscScalar dummy_scalar=1.; 5288 #endif 5289 5290 /* Get index sets for faces, edges and vertices from graph */ 5291 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5292 /* print some info */ 5293 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5294 PetscInt nv; 5295 5296 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5297 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5298 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5299 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5301 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5302 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5303 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5304 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5305 } 5306 5307 /* free unneeded index sets */ 5308 if (!pcbddc->use_vertices) { 5309 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5310 } 5311 if (!pcbddc->use_edges) { 5312 for (i=0;i<n_ISForEdges;i++) { 5313 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5314 } 5315 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5316 n_ISForEdges = 0; 5317 } 5318 if (!pcbddc->use_faces) { 5319 for (i=0;i<n_ISForFaces;i++) { 5320 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5321 } 5322 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5323 n_ISForFaces = 0; 5324 } 5325 5326 /* check if near null space is attached to global mat */ 5327 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5328 if (nearnullsp) { 5329 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5330 /* remove any stored info */ 5331 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5332 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5333 /* store information for BDDC solver reuse */ 5334 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5335 pcbddc->onearnullspace = nearnullsp; 5336 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5337 for (i=0;i<nnsp_size;i++) { 5338 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5339 } 5340 } else { /* if near null space is not provided BDDC uses constants by default */ 5341 nnsp_size = 0; 5342 nnsp_has_cnst = PETSC_TRUE; 5343 } 5344 /* get max number of constraints on a single cc */ 5345 max_constraints = nnsp_size; 5346 if (nnsp_has_cnst) max_constraints++; 5347 5348 /* 5349 Evaluate maximum storage size needed by the procedure 5350 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5351 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5352 There can be multiple constraints per connected component 5353 */ 5354 n_vertices = 0; 5355 if (ISForVertices) { 5356 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5357 } 5358 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5359 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5360 5361 total_counts = n_ISForFaces+n_ISForEdges; 5362 total_counts *= max_constraints; 5363 total_counts += n_vertices; 5364 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5365 5366 total_counts = 0; 5367 max_size_of_constraint = 0; 5368 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5369 IS used_is; 5370 if (i<n_ISForEdges) { 5371 used_is = ISForEdges[i]; 5372 } else { 5373 used_is = ISForFaces[i-n_ISForEdges]; 5374 } 5375 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5376 total_counts += j; 5377 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5378 } 5379 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5380 5381 /* get local part of global near null space vectors */ 5382 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5383 for (k=0;k<nnsp_size;k++) { 5384 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5385 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5386 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5387 } 5388 5389 /* whether or not to skip lapack calls */ 5390 skip_lapack = PETSC_TRUE; 5391 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5392 5393 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5394 if (!skip_lapack) { 5395 PetscScalar temp_work; 5396 5397 #if defined(PETSC_MISSING_LAPACK_GESVD) 5398 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5399 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5400 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5401 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5402 #if defined(PETSC_USE_COMPLEX) 5403 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5404 #endif 5405 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5406 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5407 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5408 lwork = -1; 5409 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5410 #if !defined(PETSC_USE_COMPLEX) 5411 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5412 #else 5413 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5414 #endif 5415 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5416 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5417 #else /* on missing GESVD */ 5418 /* SVD */ 5419 PetscInt max_n,min_n; 5420 max_n = max_size_of_constraint; 5421 min_n = max_constraints; 5422 if (max_size_of_constraint < max_constraints) { 5423 min_n = max_size_of_constraint; 5424 max_n = max_constraints; 5425 } 5426 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5427 #if defined(PETSC_USE_COMPLEX) 5428 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5429 #endif 5430 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5431 lwork = -1; 5432 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5433 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5434 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5435 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5436 #if !defined(PETSC_USE_COMPLEX) 5437 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5438 #else 5439 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5440 #endif 5441 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5442 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5443 #endif /* on missing GESVD */ 5444 /* Allocate optimal workspace */ 5445 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5446 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5447 } 5448 /* Now we can loop on constraining sets */ 5449 total_counts = 0; 5450 constraints_idxs_ptr[0] = 0; 5451 constraints_data_ptr[0] = 0; 5452 /* vertices */ 5453 if (n_vertices) { 5454 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5455 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5456 for (i=0;i<n_vertices;i++) { 5457 constraints_n[total_counts] = 1; 5458 constraints_data[total_counts] = 1.0; 5459 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5460 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5461 total_counts++; 5462 } 5463 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5464 n_vertices = total_counts; 5465 } 5466 5467 /* edges and faces */ 5468 total_counts_cc = total_counts; 5469 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5470 IS used_is; 5471 PetscBool idxs_copied = PETSC_FALSE; 5472 5473 if (ncc<n_ISForEdges) { 5474 used_is = ISForEdges[ncc]; 5475 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5476 } else { 5477 used_is = ISForFaces[ncc-n_ISForEdges]; 5478 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5479 } 5480 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5481 5482 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5483 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5484 /* change of basis should not be performed on local periodic nodes */ 5485 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5486 if (nnsp_has_cnst) { 5487 PetscScalar quad_value; 5488 5489 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5490 idxs_copied = PETSC_TRUE; 5491 5492 if (!pcbddc->use_nnsp_true) { 5493 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5494 } else { 5495 quad_value = 1.0; 5496 } 5497 for (j=0;j<size_of_constraint;j++) { 5498 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5499 } 5500 temp_constraints++; 5501 total_counts++; 5502 } 5503 for (k=0;k<nnsp_size;k++) { 5504 PetscReal real_value; 5505 PetscScalar *ptr_to_data; 5506 5507 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5508 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5509 for (j=0;j<size_of_constraint;j++) { 5510 ptr_to_data[j] = array[is_indices[j]]; 5511 } 5512 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5513 /* check if array is null on the connected component */ 5514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5515 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5516 if (real_value > 0.0) { /* keep indices and values */ 5517 temp_constraints++; 5518 total_counts++; 5519 if (!idxs_copied) { 5520 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5521 idxs_copied = PETSC_TRUE; 5522 } 5523 } 5524 } 5525 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5526 valid_constraints = temp_constraints; 5527 if (!pcbddc->use_nnsp_true && temp_constraints) { 5528 if (temp_constraints == 1) { /* just normalize the constraint */ 5529 PetscScalar norm,*ptr_to_data; 5530 5531 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5533 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5534 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5535 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5536 } else { /* perform SVD */ 5537 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5538 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5539 5540 #if defined(PETSC_MISSING_LAPACK_GESVD) 5541 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5542 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5543 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5544 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5545 from that computed using LAPACKgesvd 5546 -> This is due to a different computation of eigenvectors in LAPACKheev 5547 -> The quality of the POD-computed basis will be the same */ 5548 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5549 /* Store upper triangular part of correlation matrix */ 5550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5552 for (j=0;j<temp_constraints;j++) { 5553 for (k=0;k<j+1;k++) { 5554 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5555 } 5556 } 5557 /* compute eigenvalues and eigenvectors of correlation matrix */ 5558 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5559 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5560 #if !defined(PETSC_USE_COMPLEX) 5561 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5562 #else 5563 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5564 #endif 5565 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5566 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5567 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5568 j = 0; 5569 while (j < temp_constraints && singular_vals[j] < tol) j++; 5570 total_counts = total_counts-j; 5571 valid_constraints = temp_constraints-j; 5572 /* scale and copy POD basis into used quadrature memory */ 5573 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5574 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5575 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5576 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5577 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5579 if (j<temp_constraints) { 5580 PetscInt ii; 5581 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5582 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5583 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5584 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5585 for (k=0;k<temp_constraints-j;k++) { 5586 for (ii=0;ii<size_of_constraint;ii++) { 5587 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5588 } 5589 } 5590 } 5591 #else /* on missing GESVD */ 5592 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5593 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5594 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5595 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5596 #if !defined(PETSC_USE_COMPLEX) 5597 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5598 #else 5599 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5600 #endif 5601 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5602 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5603 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5604 k = temp_constraints; 5605 if (k > size_of_constraint) k = size_of_constraint; 5606 j = 0; 5607 while (j < k && singular_vals[k-j-1] < tol) j++; 5608 valid_constraints = k-j; 5609 total_counts = total_counts-temp_constraints+valid_constraints; 5610 #endif /* on missing GESVD */ 5611 } 5612 } 5613 /* update pointers information */ 5614 if (valid_constraints) { 5615 constraints_n[total_counts_cc] = valid_constraints; 5616 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5617 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5618 /* set change_of_basis flag */ 5619 if (boolforchange) { 5620 PetscBTSet(change_basis,total_counts_cc); 5621 } 5622 total_counts_cc++; 5623 } 5624 } 5625 /* free workspace */ 5626 if (!skip_lapack) { 5627 ierr = PetscFree(work);CHKERRQ(ierr); 5628 #if defined(PETSC_USE_COMPLEX) 5629 ierr = PetscFree(rwork);CHKERRQ(ierr); 5630 #endif 5631 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5632 #if defined(PETSC_MISSING_LAPACK_GESVD) 5633 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5634 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5635 #endif 5636 } 5637 for (k=0;k<nnsp_size;k++) { 5638 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5639 } 5640 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5641 /* free index sets of faces, edges and vertices */ 5642 for (i=0;i<n_ISForFaces;i++) { 5643 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5644 } 5645 if (n_ISForFaces) { 5646 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5647 } 5648 for (i=0;i<n_ISForEdges;i++) { 5649 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5650 } 5651 if (n_ISForEdges) { 5652 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5653 } 5654 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5655 } else { 5656 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5657 5658 total_counts = 0; 5659 n_vertices = 0; 5660 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5661 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5662 } 5663 max_constraints = 0; 5664 total_counts_cc = 0; 5665 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5666 total_counts += pcbddc->adaptive_constraints_n[i]; 5667 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5668 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5669 } 5670 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5671 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5672 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5673 constraints_data = pcbddc->adaptive_constraints_data; 5674 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5675 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5676 total_counts_cc = 0; 5677 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5678 if (pcbddc->adaptive_constraints_n[i]) { 5679 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5680 } 5681 } 5682 #if 0 5683 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5684 for (i=0;i<total_counts_cc;i++) { 5685 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5686 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5687 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5688 printf(" %d",constraints_idxs[j]); 5689 } 5690 printf("\n"); 5691 printf("number of cc: %d\n",constraints_n[i]); 5692 } 5693 for (i=0;i<n_vertices;i++) { 5694 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5695 } 5696 for (i=0;i<sub_schurs->n_subs;i++) { 5697 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5698 } 5699 #endif 5700 5701 max_size_of_constraint = 0; 5702 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5703 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5704 /* Change of basis */ 5705 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5706 if (pcbddc->use_change_of_basis) { 5707 for (i=0;i<sub_schurs->n_subs;i++) { 5708 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5709 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5710 } 5711 } 5712 } 5713 } 5714 pcbddc->local_primal_size = total_counts; 5715 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5716 5717 /* map constraints_idxs in boundary numbering */ 5718 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5719 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5720 5721 /* Create constraint matrix */ 5722 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5723 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5724 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5725 5726 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5727 /* determine if a QR strategy is needed for change of basis */ 5728 qr_needed = PETSC_FALSE; 5729 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5730 total_primal_vertices=0; 5731 pcbddc->local_primal_size_cc = 0; 5732 for (i=0;i<total_counts_cc;i++) { 5733 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5734 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5735 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5736 pcbddc->local_primal_size_cc += 1; 5737 } else if (PetscBTLookup(change_basis,i)) { 5738 for (k=0;k<constraints_n[i];k++) { 5739 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5740 } 5741 pcbddc->local_primal_size_cc += constraints_n[i]; 5742 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5743 PetscBTSet(qr_needed_idx,i); 5744 qr_needed = PETSC_TRUE; 5745 } 5746 } else { 5747 pcbddc->local_primal_size_cc += 1; 5748 } 5749 } 5750 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5751 pcbddc->n_vertices = total_primal_vertices; 5752 /* permute indices in order to have a sorted set of vertices */ 5753 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5754 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5755 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5756 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5757 5758 /* nonzero structure of constraint matrix */ 5759 /* and get reference dof for local constraints */ 5760 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5761 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5762 5763 j = total_primal_vertices; 5764 total_counts = total_primal_vertices; 5765 cum = total_primal_vertices; 5766 for (i=n_vertices;i<total_counts_cc;i++) { 5767 if (!PetscBTLookup(change_basis,i)) { 5768 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5769 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5770 cum++; 5771 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5772 for (k=0;k<constraints_n[i];k++) { 5773 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5774 nnz[j+k] = size_of_constraint; 5775 } 5776 j += constraints_n[i]; 5777 } 5778 } 5779 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5780 ierr = PetscFree(nnz);CHKERRQ(ierr); 5781 5782 /* set values in constraint matrix */ 5783 for (i=0;i<total_primal_vertices;i++) { 5784 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5785 } 5786 total_counts = total_primal_vertices; 5787 for (i=n_vertices;i<total_counts_cc;i++) { 5788 if (!PetscBTLookup(change_basis,i)) { 5789 PetscInt *cols; 5790 5791 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5792 cols = constraints_idxs+constraints_idxs_ptr[i]; 5793 for (k=0;k<constraints_n[i];k++) { 5794 PetscInt row = total_counts+k; 5795 PetscScalar *vals; 5796 5797 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5798 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5799 } 5800 total_counts += constraints_n[i]; 5801 } 5802 } 5803 /* assembling */ 5804 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5805 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5806 5807 /* 5808 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5809 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5810 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5811 */ 5812 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5813 if (pcbddc->use_change_of_basis) { 5814 /* dual and primal dofs on a single cc */ 5815 PetscInt dual_dofs,primal_dofs; 5816 /* working stuff for GEQRF */ 5817 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5818 PetscBLASInt lqr_work; 5819 /* working stuff for UNGQR */ 5820 PetscScalar *gqr_work,lgqr_work_t; 5821 PetscBLASInt lgqr_work; 5822 /* working stuff for TRTRS */ 5823 PetscScalar *trs_rhs; 5824 PetscBLASInt Blas_NRHS; 5825 /* pointers for values insertion into change of basis matrix */ 5826 PetscInt *start_rows,*start_cols; 5827 PetscScalar *start_vals; 5828 /* working stuff for values insertion */ 5829 PetscBT is_primal; 5830 PetscInt *aux_primal_numbering_B; 5831 /* matrix sizes */ 5832 PetscInt global_size,local_size; 5833 /* temporary change of basis */ 5834 Mat localChangeOfBasisMatrix; 5835 /* extra space for debugging */ 5836 PetscScalar *dbg_work; 5837 5838 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5839 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5840 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5841 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5842 /* nonzeros for local mat */ 5843 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5844 if (!pcbddc->benign_change || pcbddc->fake_change) { 5845 for (i=0;i<pcis->n;i++) nnz[i]=1; 5846 } else { 5847 const PetscInt *ii; 5848 PetscInt n; 5849 PetscBool flg_row; 5850 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5851 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5852 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5853 } 5854 for (i=n_vertices;i<total_counts_cc;i++) { 5855 if (PetscBTLookup(change_basis,i)) { 5856 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5857 if (PetscBTLookup(qr_needed_idx,i)) { 5858 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5859 } else { 5860 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5861 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5862 } 5863 } 5864 } 5865 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5866 ierr = PetscFree(nnz);CHKERRQ(ierr); 5867 /* Set interior change in the matrix */ 5868 if (!pcbddc->benign_change || pcbddc->fake_change) { 5869 for (i=0;i<pcis->n;i++) { 5870 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5871 } 5872 } else { 5873 const PetscInt *ii,*jj; 5874 PetscScalar *aa; 5875 PetscInt n; 5876 PetscBool flg_row; 5877 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5878 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5879 for (i=0;i<n;i++) { 5880 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5881 } 5882 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5883 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5884 } 5885 5886 if (pcbddc->dbg_flag) { 5887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5888 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5889 } 5890 5891 5892 /* Now we loop on the constraints which need a change of basis */ 5893 /* 5894 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5895 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5896 5897 Basic blocks of change of basis matrix T computed by 5898 5899 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5900 5901 | 1 0 ... 0 s_1/S | 5902 | 0 1 ... 0 s_2/S | 5903 | ... | 5904 | 0 ... 1 s_{n-1}/S | 5905 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5906 5907 with S = \sum_{i=1}^n s_i^2 5908 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5909 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5910 5911 - QR decomposition of constraints otherwise 5912 */ 5913 if (qr_needed) { 5914 /* space to store Q */ 5915 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5916 /* array to store scaling factors for reflectors */ 5917 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5918 /* first we issue queries for optimal work */ 5919 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5920 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5921 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5922 lqr_work = -1; 5923 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5924 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5925 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5926 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5927 lgqr_work = -1; 5928 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5929 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5930 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5931 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5932 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5933 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5934 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5935 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5936 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5937 /* array to store rhs and solution of triangular solver */ 5938 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5939 /* allocating workspace for check */ 5940 if (pcbddc->dbg_flag) { 5941 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5942 } 5943 } 5944 /* array to store whether a node is primal or not */ 5945 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5946 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5947 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5948 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5949 for (i=0;i<total_primal_vertices;i++) { 5950 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5951 } 5952 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5953 5954 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5955 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5956 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5957 if (PetscBTLookup(change_basis,total_counts)) { 5958 /* get constraint info */ 5959 primal_dofs = constraints_n[total_counts]; 5960 dual_dofs = size_of_constraint-primal_dofs; 5961 5962 if (pcbddc->dbg_flag) { 5963 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5964 } 5965 5966 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5967 5968 /* copy quadrature constraints for change of basis check */ 5969 if (pcbddc->dbg_flag) { 5970 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5971 } 5972 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5973 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5974 5975 /* compute QR decomposition of constraints */ 5976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5977 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5978 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5979 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5980 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5981 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5982 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5983 5984 /* explictly compute R^-T */ 5985 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5986 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5987 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5988 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5989 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5990 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5991 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5992 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5993 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5994 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5995 5996 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5997 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5998 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5999 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6000 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6001 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6002 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6003 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 6004 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6005 6006 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6007 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6008 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6009 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6010 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6011 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6013 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6015 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6016 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6018 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6019 6020 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6021 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6022 /* insert cols for primal dofs */ 6023 for (j=0;j<primal_dofs;j++) { 6024 start_vals = &qr_basis[j*size_of_constraint]; 6025 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6026 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6027 } 6028 /* insert cols for dual dofs */ 6029 for (j=0,k=0;j<dual_dofs;k++) { 6030 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6031 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6032 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6033 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6034 j++; 6035 } 6036 } 6037 6038 /* check change of basis */ 6039 if (pcbddc->dbg_flag) { 6040 PetscInt ii,jj; 6041 PetscBool valid_qr=PETSC_TRUE; 6042 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6043 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6045 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6046 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6047 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6048 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6049 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6050 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6051 for (jj=0;jj<size_of_constraint;jj++) { 6052 for (ii=0;ii<primal_dofs;ii++) { 6053 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6054 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 6055 } 6056 } 6057 if (!valid_qr) { 6058 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6059 for (jj=0;jj<size_of_constraint;jj++) { 6060 for (ii=0;ii<primal_dofs;ii++) { 6061 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6062 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6063 } 6064 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 6065 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6066 } 6067 } 6068 } 6069 } else { 6070 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6071 } 6072 } 6073 } else { /* simple transformation block */ 6074 PetscInt row,col; 6075 PetscScalar val,norm; 6076 6077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6078 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6079 for (j=0;j<size_of_constraint;j++) { 6080 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6081 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6082 if (!PetscBTLookup(is_primal,row_B)) { 6083 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6084 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6085 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6086 } else { 6087 for (k=0;k<size_of_constraint;k++) { 6088 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6089 if (row != col) { 6090 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6091 } else { 6092 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6093 } 6094 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6095 } 6096 } 6097 } 6098 if (pcbddc->dbg_flag) { 6099 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6100 } 6101 } 6102 } else { 6103 if (pcbddc->dbg_flag) { 6104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6105 } 6106 } 6107 } 6108 6109 /* free workspace */ 6110 if (qr_needed) { 6111 if (pcbddc->dbg_flag) { 6112 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6113 } 6114 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6115 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6116 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6117 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6118 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6119 } 6120 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6121 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6122 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6123 6124 /* assembling of global change of variable */ 6125 if (!pcbddc->fake_change) { 6126 Mat tmat; 6127 PetscInt bs; 6128 6129 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6130 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6131 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6132 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6133 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6134 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6135 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6136 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6137 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6138 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6139 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6140 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6141 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6142 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6143 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6144 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6145 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6146 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6147 6148 /* check */ 6149 if (pcbddc->dbg_flag) { 6150 PetscReal error; 6151 Vec x,x_change; 6152 6153 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6154 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6155 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6156 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6157 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6158 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6159 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6160 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6161 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6162 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6163 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6164 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6165 if (error > PETSC_SMALL) { 6166 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6167 } 6168 ierr = VecDestroy(&x);CHKERRQ(ierr); 6169 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6170 } 6171 /* adapt sub_schurs computed (if any) */ 6172 if (pcbddc->use_deluxe_scaling) { 6173 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6174 6175 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6176 if (sub_schurs && sub_schurs->S_Ej_all) { 6177 Mat S_new,tmat; 6178 IS is_all_N,is_V_Sall = NULL; 6179 6180 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6181 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6182 if (pcbddc->deluxe_zerorows) { 6183 ISLocalToGlobalMapping NtoSall; 6184 IS is_V; 6185 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6186 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6187 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6188 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6189 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6190 } 6191 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6192 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6193 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6194 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6195 if (pcbddc->deluxe_zerorows) { 6196 const PetscScalar *array; 6197 const PetscInt *idxs_V,*idxs_all; 6198 PetscInt i,n_V; 6199 6200 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6201 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6202 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6203 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6204 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6205 for (i=0;i<n_V;i++) { 6206 PetscScalar val; 6207 PetscInt idx; 6208 6209 idx = idxs_V[i]; 6210 val = array[idxs_all[idxs_V[i]]]; 6211 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6212 } 6213 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6214 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6215 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6216 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6217 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6218 } 6219 sub_schurs->S_Ej_all = S_new; 6220 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6221 if (sub_schurs->sum_S_Ej_all) { 6222 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6223 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6224 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6225 if (pcbddc->deluxe_zerorows) { 6226 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6227 } 6228 sub_schurs->sum_S_Ej_all = S_new; 6229 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6230 } 6231 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6232 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6233 } 6234 /* destroy any change of basis context in sub_schurs */ 6235 if (sub_schurs && sub_schurs->change) { 6236 PetscInt i; 6237 6238 for (i=0;i<sub_schurs->n_subs;i++) { 6239 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6240 } 6241 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6242 } 6243 } 6244 if (pcbddc->switch_static) { /* need to save the local change */ 6245 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6246 } else { 6247 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6248 } 6249 /* determine if any process has changed the pressures locally */ 6250 pcbddc->change_interior = pcbddc->benign_have_null; 6251 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6252 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6253 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6254 pcbddc->use_qr_single = qr_needed; 6255 } 6256 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6257 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6258 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6259 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6260 } else { 6261 Mat benign_global = NULL; 6262 if (pcbddc->benign_have_null) { 6263 Mat tmat; 6264 6265 pcbddc->change_interior = PETSC_TRUE; 6266 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6267 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6268 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6269 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6270 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6271 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6272 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6273 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6274 if (pcbddc->benign_change) { 6275 Mat M; 6276 6277 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6278 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6279 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6280 ierr = MatDestroy(&M);CHKERRQ(ierr); 6281 } else { 6282 Mat eye; 6283 PetscScalar *array; 6284 6285 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6286 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6287 for (i=0;i<pcis->n;i++) { 6288 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6289 } 6290 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6291 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6292 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6293 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6294 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6295 } 6296 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6297 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6298 } 6299 if (pcbddc->user_ChangeOfBasisMatrix) { 6300 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6301 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6302 } else if (pcbddc->benign_have_null) { 6303 pcbddc->ChangeOfBasisMatrix = benign_global; 6304 } 6305 } 6306 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6307 IS is_global; 6308 const PetscInt *gidxs; 6309 6310 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6311 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6312 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6313 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6314 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6315 } 6316 } 6317 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6318 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6319 } 6320 6321 if (!pcbddc->fake_change) { 6322 /* add pressure dofs to set of primal nodes for numbering purposes */ 6323 for (i=0;i<pcbddc->benign_n;i++) { 6324 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6325 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6326 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6327 pcbddc->local_primal_size_cc++; 6328 pcbddc->local_primal_size++; 6329 } 6330 6331 /* check if a new primal space has been introduced (also take into account benign trick) */ 6332 pcbddc->new_primal_space_local = PETSC_TRUE; 6333 if (olocal_primal_size == pcbddc->local_primal_size) { 6334 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6335 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6336 if (!pcbddc->new_primal_space_local) { 6337 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6338 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6339 } 6340 } 6341 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6342 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6343 } 6344 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6345 6346 /* flush dbg viewer */ 6347 if (pcbddc->dbg_flag) { 6348 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6349 } 6350 6351 /* free workspace */ 6352 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6353 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6354 if (!pcbddc->adaptive_selection) { 6355 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6356 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6357 } else { 6358 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6359 pcbddc->adaptive_constraints_idxs_ptr, 6360 pcbddc->adaptive_constraints_data_ptr, 6361 pcbddc->adaptive_constraints_idxs, 6362 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6363 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6364 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6365 } 6366 PetscFunctionReturn(0); 6367 } 6368 6369 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6370 { 6371 ISLocalToGlobalMapping map; 6372 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6373 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6374 PetscInt i,N; 6375 PetscBool rcsr = PETSC_FALSE; 6376 PetscErrorCode ierr; 6377 6378 PetscFunctionBegin; 6379 if (pcbddc->recompute_topography) { 6380 pcbddc->graphanalyzed = PETSC_FALSE; 6381 /* Reset previously computed graph */ 6382 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6383 /* Init local Graph struct */ 6384 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6385 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6386 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6387 6388 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6389 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6390 } 6391 /* Check validity of the csr graph passed in by the user */ 6392 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6393 6394 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6395 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6396 PetscInt *xadj,*adjncy; 6397 PetscInt nvtxs; 6398 PetscBool flg_row=PETSC_FALSE; 6399 6400 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6401 if (flg_row) { 6402 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6403 pcbddc->computed_rowadj = PETSC_TRUE; 6404 } 6405 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6406 rcsr = PETSC_TRUE; 6407 } 6408 if (pcbddc->dbg_flag) { 6409 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6410 } 6411 6412 /* Setup of Graph */ 6413 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6414 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6415 6416 /* attach info on disconnected subdomains if present */ 6417 if (pcbddc->n_local_subs) { 6418 PetscInt *local_subs; 6419 6420 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6421 for (i=0;i<pcbddc->n_local_subs;i++) { 6422 const PetscInt *idxs; 6423 PetscInt nl,j; 6424 6425 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6426 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6427 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6428 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6429 } 6430 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6431 pcbddc->mat_graph->local_subs = local_subs; 6432 } 6433 } 6434 6435 if (!pcbddc->graphanalyzed) { 6436 /* Graph's connected components analysis */ 6437 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6438 pcbddc->graphanalyzed = PETSC_TRUE; 6439 } 6440 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6441 PetscFunctionReturn(0); 6442 } 6443 6444 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6445 { 6446 PetscInt i,j; 6447 PetscScalar *alphas; 6448 PetscErrorCode ierr; 6449 6450 PetscFunctionBegin; 6451 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6452 for (i=0;i<n;i++) { 6453 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6454 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6455 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6456 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6457 } 6458 ierr = PetscFree(alphas);CHKERRQ(ierr); 6459 PetscFunctionReturn(0); 6460 } 6461 6462 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6463 { 6464 Mat A; 6465 PetscInt n_neighs,*neighs,*n_shared,**shared; 6466 PetscMPIInt size,rank,color; 6467 PetscInt *xadj,*adjncy; 6468 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6469 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6470 PetscInt void_procs,*procs_candidates = NULL; 6471 PetscInt xadj_count,*count; 6472 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6473 PetscSubcomm psubcomm; 6474 MPI_Comm subcomm; 6475 PetscErrorCode ierr; 6476 6477 PetscFunctionBegin; 6478 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6479 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6480 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6481 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6482 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6483 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6484 6485 if (have_void) *have_void = PETSC_FALSE; 6486 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6487 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6488 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6489 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6490 im_active = !!n; 6491 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6492 void_procs = size - active_procs; 6493 /* get ranks of of non-active processes in mat communicator */ 6494 if (void_procs) { 6495 PetscInt ncand; 6496 6497 if (have_void) *have_void = PETSC_TRUE; 6498 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6499 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6500 for (i=0,ncand=0;i<size;i++) { 6501 if (!procs_candidates[i]) { 6502 procs_candidates[ncand++] = i; 6503 } 6504 } 6505 /* force n_subdomains to be not greater that the number of non-active processes */ 6506 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6507 } 6508 6509 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6510 number of subdomains requested 1 -> send to master or first candidate in voids */ 6511 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6512 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6513 PetscInt issize,isidx,dest; 6514 if (*n_subdomains == 1) dest = 0; 6515 else dest = rank; 6516 if (im_active) { 6517 issize = 1; 6518 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6519 isidx = procs_candidates[dest]; 6520 } else { 6521 isidx = dest; 6522 } 6523 } else { 6524 issize = 0; 6525 isidx = -1; 6526 } 6527 if (*n_subdomains != 1) *n_subdomains = active_procs; 6528 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6529 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6530 PetscFunctionReturn(0); 6531 } 6532 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6533 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6534 threshold = PetscMax(threshold,2); 6535 6536 /* Get info on mapping */ 6537 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6538 6539 /* build local CSR graph of subdomains' connectivity */ 6540 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6541 xadj[0] = 0; 6542 xadj[1] = PetscMax(n_neighs-1,0); 6543 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6544 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6545 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6546 for (i=1;i<n_neighs;i++) 6547 for (j=0;j<n_shared[i];j++) 6548 count[shared[i][j]] += 1; 6549 6550 xadj_count = 0; 6551 for (i=1;i<n_neighs;i++) { 6552 for (j=0;j<n_shared[i];j++) { 6553 if (count[shared[i][j]] < threshold) { 6554 adjncy[xadj_count] = neighs[i]; 6555 adjncy_wgt[xadj_count] = n_shared[i]; 6556 xadj_count++; 6557 break; 6558 } 6559 } 6560 } 6561 xadj[1] = xadj_count; 6562 ierr = PetscFree(count);CHKERRQ(ierr); 6563 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6564 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6565 6566 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6567 6568 /* Restrict work on active processes only */ 6569 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6570 if (void_procs) { 6571 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6572 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6573 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6574 subcomm = PetscSubcommChild(psubcomm); 6575 } else { 6576 psubcomm = NULL; 6577 subcomm = PetscObjectComm((PetscObject)mat); 6578 } 6579 6580 v_wgt = NULL; 6581 if (!color) { 6582 ierr = PetscFree(xadj);CHKERRQ(ierr); 6583 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6584 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6585 } else { 6586 Mat subdomain_adj; 6587 IS new_ranks,new_ranks_contig; 6588 MatPartitioning partitioner; 6589 PetscInt rstart=0,rend=0; 6590 PetscInt *is_indices,*oldranks; 6591 PetscMPIInt size; 6592 PetscBool aggregate; 6593 6594 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6595 if (void_procs) { 6596 PetscInt prank = rank; 6597 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6598 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6599 for (i=0;i<xadj[1];i++) { 6600 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6601 } 6602 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6603 } else { 6604 oldranks = NULL; 6605 } 6606 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6607 if (aggregate) { /* TODO: all this part could be made more efficient */ 6608 PetscInt lrows,row,ncols,*cols; 6609 PetscMPIInt nrank; 6610 PetscScalar *vals; 6611 6612 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6613 lrows = 0; 6614 if (nrank<redprocs) { 6615 lrows = size/redprocs; 6616 if (nrank<size%redprocs) lrows++; 6617 } 6618 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6619 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6620 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6621 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6622 row = nrank; 6623 ncols = xadj[1]-xadj[0]; 6624 cols = adjncy; 6625 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6626 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6627 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6628 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6629 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6630 ierr = PetscFree(xadj);CHKERRQ(ierr); 6631 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6632 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6633 ierr = PetscFree(vals);CHKERRQ(ierr); 6634 if (use_vwgt) { 6635 Vec v; 6636 const PetscScalar *array; 6637 PetscInt nl; 6638 6639 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6640 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6641 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6642 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6643 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6644 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6645 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6646 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6647 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6648 ierr = VecDestroy(&v);CHKERRQ(ierr); 6649 } 6650 } else { 6651 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6652 if (use_vwgt) { 6653 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6654 v_wgt[0] = n; 6655 } 6656 } 6657 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6658 6659 /* Partition */ 6660 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6661 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6662 if (v_wgt) { 6663 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6664 } 6665 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6666 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6667 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6668 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6669 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6670 6671 /* renumber new_ranks to avoid "holes" in new set of processors */ 6672 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6673 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6674 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6675 if (!aggregate) { 6676 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6677 #if defined(PETSC_USE_DEBUG) 6678 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6679 #endif 6680 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6681 } else if (oldranks) { 6682 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6683 } else { 6684 ranks_send_to_idx[0] = is_indices[0]; 6685 } 6686 } else { 6687 PetscInt idxs[1]; 6688 PetscMPIInt tag; 6689 MPI_Request *reqs; 6690 6691 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6692 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6693 for (i=rstart;i<rend;i++) { 6694 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6695 } 6696 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6697 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6698 ierr = PetscFree(reqs);CHKERRQ(ierr); 6699 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6700 #if defined(PETSC_USE_DEBUG) 6701 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6702 #endif 6703 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6704 } else if (oldranks) { 6705 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6706 } else { 6707 ranks_send_to_idx[0] = idxs[0]; 6708 } 6709 } 6710 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6711 /* clean up */ 6712 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6713 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6714 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6715 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6716 } 6717 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6718 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6719 6720 /* assemble parallel IS for sends */ 6721 i = 1; 6722 if (!color) i=0; 6723 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6724 PetscFunctionReturn(0); 6725 } 6726 6727 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6728 6729 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6730 { 6731 Mat local_mat; 6732 IS is_sends_internal; 6733 PetscInt rows,cols,new_local_rows; 6734 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6735 PetscBool ismatis,isdense,newisdense,destroy_mat; 6736 ISLocalToGlobalMapping l2gmap; 6737 PetscInt* l2gmap_indices; 6738 const PetscInt* is_indices; 6739 MatType new_local_type; 6740 /* buffers */ 6741 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6742 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6743 PetscInt *recv_buffer_idxs_local; 6744 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6745 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6746 /* MPI */ 6747 MPI_Comm comm,comm_n; 6748 PetscSubcomm subcomm; 6749 PetscMPIInt n_sends,n_recvs,commsize; 6750 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6751 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6752 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6753 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6754 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6755 PetscErrorCode ierr; 6756 6757 PetscFunctionBegin; 6758 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6759 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6760 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6761 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6762 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6763 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6764 PetscValidLogicalCollectiveBool(mat,reuse,6); 6765 PetscValidLogicalCollectiveInt(mat,nis,8); 6766 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6767 if (nvecs) { 6768 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6769 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6770 } 6771 /* further checks */ 6772 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6773 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6774 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6775 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6776 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6777 if (reuse && *mat_n) { 6778 PetscInt mrows,mcols,mnrows,mncols; 6779 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6780 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6781 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6782 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6783 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6784 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6785 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6786 } 6787 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6788 PetscValidLogicalCollectiveInt(mat,bs,0); 6789 6790 /* prepare IS for sending if not provided */ 6791 if (!is_sends) { 6792 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6793 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6794 } else { 6795 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6796 is_sends_internal = is_sends; 6797 } 6798 6799 /* get comm */ 6800 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6801 6802 /* compute number of sends */ 6803 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6804 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6805 6806 /* compute number of receives */ 6807 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6808 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6809 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6810 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6811 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6812 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6813 ierr = PetscFree(iflags);CHKERRQ(ierr); 6814 6815 /* restrict comm if requested */ 6816 subcomm = 0; 6817 destroy_mat = PETSC_FALSE; 6818 if (restrict_comm) { 6819 PetscMPIInt color,subcommsize; 6820 6821 color = 0; 6822 if (restrict_full) { 6823 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6824 } else { 6825 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6826 } 6827 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6828 subcommsize = commsize - subcommsize; 6829 /* check if reuse has been requested */ 6830 if (reuse) { 6831 if (*mat_n) { 6832 PetscMPIInt subcommsize2; 6833 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6834 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6835 comm_n = PetscObjectComm((PetscObject)*mat_n); 6836 } else { 6837 comm_n = PETSC_COMM_SELF; 6838 } 6839 } else { /* MAT_INITIAL_MATRIX */ 6840 PetscMPIInt rank; 6841 6842 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6843 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6844 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6845 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6846 comm_n = PetscSubcommChild(subcomm); 6847 } 6848 /* flag to destroy *mat_n if not significative */ 6849 if (color) destroy_mat = PETSC_TRUE; 6850 } else { 6851 comm_n = comm; 6852 } 6853 6854 /* prepare send/receive buffers */ 6855 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6856 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6857 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6858 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6859 if (nis) { 6860 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6861 } 6862 6863 /* Get data from local matrices */ 6864 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6865 /* TODO: See below some guidelines on how to prepare the local buffers */ 6866 /* 6867 send_buffer_vals should contain the raw values of the local matrix 6868 send_buffer_idxs should contain: 6869 - MatType_PRIVATE type 6870 - PetscInt size_of_l2gmap 6871 - PetscInt global_row_indices[size_of_l2gmap] 6872 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6873 */ 6874 else { 6875 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6876 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6877 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6878 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6879 send_buffer_idxs[1] = i; 6880 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6881 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6882 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6883 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6884 for (i=0;i<n_sends;i++) { 6885 ilengths_vals[is_indices[i]] = len*len; 6886 ilengths_idxs[is_indices[i]] = len+2; 6887 } 6888 } 6889 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6890 /* additional is (if any) */ 6891 if (nis) { 6892 PetscMPIInt psum; 6893 PetscInt j; 6894 for (j=0,psum=0;j<nis;j++) { 6895 PetscInt plen; 6896 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6897 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6898 psum += len+1; /* indices + lenght */ 6899 } 6900 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6901 for (j=0,psum=0;j<nis;j++) { 6902 PetscInt plen; 6903 const PetscInt *is_array_idxs; 6904 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6905 send_buffer_idxs_is[psum] = plen; 6906 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6907 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6908 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6909 psum += plen+1; /* indices + lenght */ 6910 } 6911 for (i=0;i<n_sends;i++) { 6912 ilengths_idxs_is[is_indices[i]] = psum; 6913 } 6914 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6915 } 6916 6917 buf_size_idxs = 0; 6918 buf_size_vals = 0; 6919 buf_size_idxs_is = 0; 6920 buf_size_vecs = 0; 6921 for (i=0;i<n_recvs;i++) { 6922 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6923 buf_size_vals += (PetscInt)olengths_vals[i]; 6924 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6925 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6926 } 6927 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6928 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6929 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6930 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6931 6932 /* get new tags for clean communications */ 6933 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6934 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6935 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6936 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6937 6938 /* allocate for requests */ 6939 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6940 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6941 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6942 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6943 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6944 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6945 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6946 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6947 6948 /* communications */ 6949 ptr_idxs = recv_buffer_idxs; 6950 ptr_vals = recv_buffer_vals; 6951 ptr_idxs_is = recv_buffer_idxs_is; 6952 ptr_vecs = recv_buffer_vecs; 6953 for (i=0;i<n_recvs;i++) { 6954 source_dest = onodes[i]; 6955 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6956 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6957 ptr_idxs += olengths_idxs[i]; 6958 ptr_vals += olengths_vals[i]; 6959 if (nis) { 6960 source_dest = onodes_is[i]; 6961 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6962 ptr_idxs_is += olengths_idxs_is[i]; 6963 } 6964 if (nvecs) { 6965 source_dest = onodes[i]; 6966 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6967 ptr_vecs += olengths_idxs[i]-2; 6968 } 6969 } 6970 for (i=0;i<n_sends;i++) { 6971 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6972 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6973 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6974 if (nis) { 6975 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6976 } 6977 if (nvecs) { 6978 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6979 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6980 } 6981 } 6982 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6983 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6984 6985 /* assemble new l2g map */ 6986 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6987 ptr_idxs = recv_buffer_idxs; 6988 new_local_rows = 0; 6989 for (i=0;i<n_recvs;i++) { 6990 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6991 ptr_idxs += olengths_idxs[i]; 6992 } 6993 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6994 ptr_idxs = recv_buffer_idxs; 6995 new_local_rows = 0; 6996 for (i=0;i<n_recvs;i++) { 6997 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6998 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6999 ptr_idxs += olengths_idxs[i]; 7000 } 7001 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7002 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7003 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7004 7005 /* infer new local matrix type from received local matrices type */ 7006 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7007 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7008 if (n_recvs) { 7009 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7010 ptr_idxs = recv_buffer_idxs; 7011 for (i=0;i<n_recvs;i++) { 7012 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7013 new_local_type_private = MATAIJ_PRIVATE; 7014 break; 7015 } 7016 ptr_idxs += olengths_idxs[i]; 7017 } 7018 switch (new_local_type_private) { 7019 case MATDENSE_PRIVATE: 7020 new_local_type = MATSEQAIJ; 7021 bs = 1; 7022 break; 7023 case MATAIJ_PRIVATE: 7024 new_local_type = MATSEQAIJ; 7025 bs = 1; 7026 break; 7027 case MATBAIJ_PRIVATE: 7028 new_local_type = MATSEQBAIJ; 7029 break; 7030 case MATSBAIJ_PRIVATE: 7031 new_local_type = MATSEQSBAIJ; 7032 break; 7033 default: 7034 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7035 break; 7036 } 7037 } else { /* by default, new_local_type is seqaij */ 7038 new_local_type = MATSEQAIJ; 7039 bs = 1; 7040 } 7041 7042 /* create MATIS object if needed */ 7043 if (!reuse) { 7044 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7045 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7046 } else { 7047 /* it also destroys the local matrices */ 7048 if (*mat_n) { 7049 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7050 } else { /* this is a fake object */ 7051 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7052 } 7053 } 7054 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7055 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7056 7057 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7058 7059 /* Global to local map of received indices */ 7060 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7061 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7062 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7063 7064 /* restore attributes -> type of incoming data and its size */ 7065 buf_size_idxs = 0; 7066 for (i=0;i<n_recvs;i++) { 7067 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7068 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7069 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7070 } 7071 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7072 7073 /* set preallocation */ 7074 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7075 if (!newisdense) { 7076 PetscInt *new_local_nnz=0; 7077 7078 ptr_idxs = recv_buffer_idxs_local; 7079 if (n_recvs) { 7080 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7081 } 7082 for (i=0;i<n_recvs;i++) { 7083 PetscInt j; 7084 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7085 for (j=0;j<*(ptr_idxs+1);j++) { 7086 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7087 } 7088 } else { 7089 /* TODO */ 7090 } 7091 ptr_idxs += olengths_idxs[i]; 7092 } 7093 if (new_local_nnz) { 7094 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7095 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7096 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7097 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7098 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7099 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7100 } else { 7101 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7102 } 7103 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7104 } else { 7105 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7106 } 7107 7108 /* set values */ 7109 ptr_vals = recv_buffer_vals; 7110 ptr_idxs = recv_buffer_idxs_local; 7111 for (i=0;i<n_recvs;i++) { 7112 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7113 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7114 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7115 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7116 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7117 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7118 } else { 7119 /* TODO */ 7120 } 7121 ptr_idxs += olengths_idxs[i]; 7122 ptr_vals += olengths_vals[i]; 7123 } 7124 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7125 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7126 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7127 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7128 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7129 7130 #if 0 7131 if (!restrict_comm) { /* check */ 7132 Vec lvec,rvec; 7133 PetscReal infty_error; 7134 7135 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7136 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7137 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7138 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7139 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7140 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7141 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7142 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7143 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7144 } 7145 #endif 7146 7147 /* assemble new additional is (if any) */ 7148 if (nis) { 7149 PetscInt **temp_idxs,*count_is,j,psum; 7150 7151 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7152 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7153 ptr_idxs = recv_buffer_idxs_is; 7154 psum = 0; 7155 for (i=0;i<n_recvs;i++) { 7156 for (j=0;j<nis;j++) { 7157 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7158 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7159 psum += plen; 7160 ptr_idxs += plen+1; /* shift pointer to received data */ 7161 } 7162 } 7163 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7164 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7165 for (i=1;i<nis;i++) { 7166 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7167 } 7168 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7169 ptr_idxs = recv_buffer_idxs_is; 7170 for (i=0;i<n_recvs;i++) { 7171 for (j=0;j<nis;j++) { 7172 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7173 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7174 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7175 ptr_idxs += plen+1; /* shift pointer to received data */ 7176 } 7177 } 7178 for (i=0;i<nis;i++) { 7179 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7180 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7181 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7182 } 7183 ierr = PetscFree(count_is);CHKERRQ(ierr); 7184 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7185 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7186 } 7187 /* free workspace */ 7188 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7189 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7190 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7191 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7192 if (isdense) { 7193 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7194 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7195 } else { 7196 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7197 } 7198 if (nis) { 7199 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7200 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7201 } 7202 7203 if (nvecs) { 7204 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7205 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7206 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7207 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7208 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7209 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7210 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7211 /* set values */ 7212 ptr_vals = recv_buffer_vecs; 7213 ptr_idxs = recv_buffer_idxs_local; 7214 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7215 for (i=0;i<n_recvs;i++) { 7216 PetscInt j; 7217 for (j=0;j<*(ptr_idxs+1);j++) { 7218 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7219 } 7220 ptr_idxs += olengths_idxs[i]; 7221 ptr_vals += olengths_idxs[i]-2; 7222 } 7223 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7224 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7225 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7226 } 7227 7228 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7229 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7230 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7231 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7232 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7233 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7234 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7235 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7236 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7237 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7238 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7239 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7240 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7241 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7242 ierr = PetscFree(onodes);CHKERRQ(ierr); 7243 if (nis) { 7244 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7245 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7246 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7247 } 7248 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7249 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7250 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7251 for (i=0;i<nis;i++) { 7252 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7253 } 7254 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7255 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7256 } 7257 *mat_n = NULL; 7258 } 7259 PetscFunctionReturn(0); 7260 } 7261 7262 /* temporary hack into ksp private data structure */ 7263 #include <petsc/private/kspimpl.h> 7264 7265 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7266 { 7267 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7268 PC_IS *pcis = (PC_IS*)pc->data; 7269 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7270 Mat coarsedivudotp = NULL; 7271 Mat coarseG,t_coarse_mat_is; 7272 MatNullSpace CoarseNullSpace = NULL; 7273 ISLocalToGlobalMapping coarse_islg; 7274 IS coarse_is,*isarray; 7275 PetscInt i,im_active=-1,active_procs=-1; 7276 PetscInt nis,nisdofs,nisneu,nisvert; 7277 PC pc_temp; 7278 PCType coarse_pc_type; 7279 KSPType coarse_ksp_type; 7280 PetscBool multilevel_requested,multilevel_allowed; 7281 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7282 PetscInt ncoarse,nedcfield; 7283 PetscBool compute_vecs = PETSC_FALSE; 7284 PetscScalar *array; 7285 MatReuse coarse_mat_reuse; 7286 PetscBool restr, full_restr, have_void; 7287 PetscMPIInt commsize; 7288 PetscErrorCode ierr; 7289 7290 PetscFunctionBegin; 7291 /* Assign global numbering to coarse dofs */ 7292 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7293 PetscInt ocoarse_size; 7294 compute_vecs = PETSC_TRUE; 7295 7296 pcbddc->new_primal_space = PETSC_TRUE; 7297 ocoarse_size = pcbddc->coarse_size; 7298 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7299 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7300 /* see if we can avoid some work */ 7301 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7302 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7303 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7304 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7305 coarse_reuse = PETSC_FALSE; 7306 } else { /* we can safely reuse already computed coarse matrix */ 7307 coarse_reuse = PETSC_TRUE; 7308 } 7309 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7310 coarse_reuse = PETSC_FALSE; 7311 } 7312 /* reset any subassembling information */ 7313 if (!coarse_reuse || pcbddc->recompute_topography) { 7314 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7315 } 7316 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7317 coarse_reuse = PETSC_TRUE; 7318 } 7319 /* assemble coarse matrix */ 7320 if (coarse_reuse && pcbddc->coarse_ksp) { 7321 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7322 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7323 coarse_mat_reuse = MAT_REUSE_MATRIX; 7324 } else { 7325 coarse_mat = NULL; 7326 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7327 } 7328 7329 /* creates temporary l2gmap and IS for coarse indexes */ 7330 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7331 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7332 7333 /* creates temporary MATIS object for coarse matrix */ 7334 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7335 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7336 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7337 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7338 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7339 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7340 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7341 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7342 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7343 7344 /* count "active" (i.e. with positive local size) and "void" processes */ 7345 im_active = !!(pcis->n); 7346 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7347 7348 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7349 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7350 /* full_restr : just use the receivers from the subassembling pattern */ 7351 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7352 coarse_mat_is = NULL; 7353 multilevel_allowed = PETSC_FALSE; 7354 multilevel_requested = PETSC_FALSE; 7355 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7356 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7357 if (multilevel_requested) { 7358 ncoarse = active_procs/pcbddc->coarsening_ratio; 7359 restr = PETSC_FALSE; 7360 full_restr = PETSC_FALSE; 7361 } else { 7362 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7363 restr = PETSC_TRUE; 7364 full_restr = PETSC_TRUE; 7365 } 7366 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7367 ncoarse = PetscMax(1,ncoarse); 7368 if (!pcbddc->coarse_subassembling) { 7369 if (pcbddc->coarsening_ratio > 1) { 7370 if (multilevel_requested) { 7371 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7372 } else { 7373 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7374 } 7375 } else { 7376 PetscMPIInt rank; 7377 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7378 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7379 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7380 } 7381 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7382 PetscInt psum; 7383 if (pcbddc->coarse_ksp) psum = 1; 7384 else psum = 0; 7385 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7386 if (ncoarse < commsize) have_void = PETSC_TRUE; 7387 } 7388 /* determine if we can go multilevel */ 7389 if (multilevel_requested) { 7390 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7391 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7392 } 7393 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7394 7395 /* dump subassembling pattern */ 7396 if (pcbddc->dbg_flag && multilevel_allowed) { 7397 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7398 } 7399 7400 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7401 nedcfield = -1; 7402 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7403 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7404 const PetscInt *idxs; 7405 ISLocalToGlobalMapping tmap; 7406 7407 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7408 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7409 /* allocate space for temporary storage */ 7410 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7411 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7412 /* allocate for IS array */ 7413 nisdofs = pcbddc->n_ISForDofsLocal; 7414 if (pcbddc->nedclocal) { 7415 if (pcbddc->nedfield > -1) { 7416 nedcfield = pcbddc->nedfield; 7417 } else { 7418 nedcfield = 0; 7419 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7420 nisdofs = 1; 7421 } 7422 } 7423 nisneu = !!pcbddc->NeumannBoundariesLocal; 7424 nisvert = 0; /* nisvert is not used */ 7425 nis = nisdofs + nisneu + nisvert; 7426 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7427 /* dofs splitting */ 7428 for (i=0;i<nisdofs;i++) { 7429 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7430 if (nedcfield != i) { 7431 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7432 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7433 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7434 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7435 } else { 7436 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7437 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7438 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7439 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7440 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7441 } 7442 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7443 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7444 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7445 } 7446 /* neumann boundaries */ 7447 if (pcbddc->NeumannBoundariesLocal) { 7448 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7449 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7450 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7451 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7452 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7453 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7454 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7455 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7456 } 7457 /* free memory */ 7458 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7459 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7460 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7461 } else { 7462 nis = 0; 7463 nisdofs = 0; 7464 nisneu = 0; 7465 nisvert = 0; 7466 isarray = NULL; 7467 } 7468 /* destroy no longer needed map */ 7469 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7470 7471 /* subassemble */ 7472 if (multilevel_allowed) { 7473 Vec vp[1]; 7474 PetscInt nvecs = 0; 7475 PetscBool reuse,reuser; 7476 7477 if (coarse_mat) reuse = PETSC_TRUE; 7478 else reuse = PETSC_FALSE; 7479 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7480 vp[0] = NULL; 7481 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7482 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7483 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7484 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7485 nvecs = 1; 7486 7487 if (pcbddc->divudotp) { 7488 Mat B,loc_divudotp; 7489 Vec v,p; 7490 IS dummy; 7491 PetscInt np; 7492 7493 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7494 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7495 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7496 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7497 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7498 ierr = VecSet(p,1.);CHKERRQ(ierr); 7499 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7500 ierr = VecDestroy(&p);CHKERRQ(ierr); 7501 ierr = MatDestroy(&B);CHKERRQ(ierr); 7502 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7503 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7504 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7505 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7506 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7507 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7508 ierr = VecDestroy(&v);CHKERRQ(ierr); 7509 } 7510 } 7511 if (reuser) { 7512 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7513 } else { 7514 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7515 } 7516 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7517 PetscScalar *arraym,*arrayv; 7518 PetscInt nl; 7519 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7520 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7521 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7522 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7523 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7524 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7525 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7526 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7527 } else { 7528 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7529 } 7530 } else { 7531 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7532 } 7533 if (coarse_mat_is || coarse_mat) { 7534 PetscMPIInt size; 7535 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7536 if (!multilevel_allowed) { 7537 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7538 } else { 7539 Mat A; 7540 7541 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7542 if (coarse_mat_is) { 7543 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7544 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7545 coarse_mat = coarse_mat_is; 7546 } 7547 /* be sure we don't have MatSeqDENSE as local mat */ 7548 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7549 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7550 } 7551 } 7552 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7553 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7554 7555 /* create local to global scatters for coarse problem */ 7556 if (compute_vecs) { 7557 PetscInt lrows; 7558 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7559 if (coarse_mat) { 7560 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7561 } else { 7562 lrows = 0; 7563 } 7564 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7565 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7566 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7567 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7568 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7569 } 7570 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7571 7572 /* set defaults for coarse KSP and PC */ 7573 if (multilevel_allowed) { 7574 coarse_ksp_type = KSPRICHARDSON; 7575 coarse_pc_type = PCBDDC; 7576 } else { 7577 coarse_ksp_type = KSPPREONLY; 7578 coarse_pc_type = PCREDUNDANT; 7579 } 7580 7581 /* print some info if requested */ 7582 if (pcbddc->dbg_flag) { 7583 if (!multilevel_allowed) { 7584 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7585 if (multilevel_requested) { 7586 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7587 } else if (pcbddc->max_levels) { 7588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7589 } 7590 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7591 } 7592 } 7593 7594 /* communicate coarse discrete gradient */ 7595 coarseG = NULL; 7596 if (pcbddc->nedcG && multilevel_allowed) { 7597 MPI_Comm ccomm; 7598 if (coarse_mat) { 7599 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7600 } else { 7601 ccomm = MPI_COMM_NULL; 7602 } 7603 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7604 } 7605 7606 /* create the coarse KSP object only once with defaults */ 7607 if (coarse_mat) { 7608 PetscViewer dbg_viewer = NULL; 7609 if (pcbddc->dbg_flag) { 7610 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7611 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7612 } 7613 if (!pcbddc->coarse_ksp) { 7614 char prefix[256],str_level[16]; 7615 size_t len; 7616 7617 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7618 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7619 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7620 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7621 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7622 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7623 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7624 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7625 /* TODO is this logic correct? should check for coarse_mat type */ 7626 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7627 /* prefix */ 7628 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7629 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7630 if (!pcbddc->current_level) { 7631 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7632 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7633 } else { 7634 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7635 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7636 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7637 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7638 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7639 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7640 } 7641 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7642 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7643 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7644 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7645 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7646 /* allow user customization */ 7647 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7648 } 7649 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7650 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7651 if (nisdofs) { 7652 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7653 for (i=0;i<nisdofs;i++) { 7654 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7655 } 7656 } 7657 if (nisneu) { 7658 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7659 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7660 } 7661 if (nisvert) { 7662 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7663 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7664 } 7665 if (coarseG) { 7666 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7667 } 7668 7669 /* get some info after set from options */ 7670 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7671 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7672 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7673 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7674 if (isbddc && !multilevel_allowed) { 7675 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7676 isbddc = PETSC_FALSE; 7677 } 7678 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7679 if (multilevel_requested && !isbddc && !isnn) { 7680 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7681 isbddc = PETSC_TRUE; 7682 isnn = PETSC_FALSE; 7683 } 7684 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7685 if (isredundant) { 7686 KSP inner_ksp; 7687 PC inner_pc; 7688 7689 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7690 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7691 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7692 } 7693 7694 /* parameters which miss an API */ 7695 if (isbddc) { 7696 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7697 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7698 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7699 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7700 if (pcbddc_coarse->benign_saddle_point) { 7701 Mat coarsedivudotp_is; 7702 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7703 IS row,col; 7704 const PetscInt *gidxs; 7705 PetscInt n,st,M,N; 7706 7707 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7708 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7709 st = st-n; 7710 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7711 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7712 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7713 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7714 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7715 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7716 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7717 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7718 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7719 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7720 ierr = ISDestroy(&row);CHKERRQ(ierr); 7721 ierr = ISDestroy(&col);CHKERRQ(ierr); 7722 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7723 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7724 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7725 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7726 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7727 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7728 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7729 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7730 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7731 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7732 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7733 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7734 } 7735 } 7736 7737 /* propagate symmetry info of coarse matrix */ 7738 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7739 if (pc->pmat->symmetric_set) { 7740 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7741 } 7742 if (pc->pmat->hermitian_set) { 7743 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7744 } 7745 if (pc->pmat->spd_set) { 7746 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7747 } 7748 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7749 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7750 } 7751 /* set operators */ 7752 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7753 if (pcbddc->dbg_flag) { 7754 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7755 } 7756 } 7757 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7758 ierr = PetscFree(isarray);CHKERRQ(ierr); 7759 #if 0 7760 { 7761 PetscViewer viewer; 7762 char filename[256]; 7763 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7764 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7765 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7766 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7767 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7768 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7769 } 7770 #endif 7771 7772 if (pcbddc->coarse_ksp) { 7773 Vec crhs,csol; 7774 7775 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7776 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7777 if (!csol) { 7778 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7779 } 7780 if (!crhs) { 7781 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7782 } 7783 } 7784 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7785 7786 /* compute null space for coarse solver if the benign trick has been requested */ 7787 if (pcbddc->benign_null) { 7788 7789 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7790 for (i=0;i<pcbddc->benign_n;i++) { 7791 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7792 } 7793 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7794 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7795 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7796 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7797 if (coarse_mat) { 7798 Vec nullv; 7799 PetscScalar *array,*array2; 7800 PetscInt nl; 7801 7802 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7803 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7804 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7805 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7806 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7807 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7808 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7809 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7810 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7811 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7812 } 7813 } 7814 7815 if (pcbddc->coarse_ksp) { 7816 PetscBool ispreonly; 7817 7818 if (CoarseNullSpace) { 7819 PetscBool isnull; 7820 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7821 if (isnull) { 7822 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7823 } 7824 /* TODO: add local nullspaces (if any) */ 7825 } 7826 /* setup coarse ksp */ 7827 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7828 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7829 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7830 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7831 KSP check_ksp; 7832 KSPType check_ksp_type; 7833 PC check_pc; 7834 Vec check_vec,coarse_vec; 7835 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7836 PetscInt its; 7837 PetscBool compute_eigs; 7838 PetscReal *eigs_r,*eigs_c; 7839 PetscInt neigs; 7840 const char *prefix; 7841 7842 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7843 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7844 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7845 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7846 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7847 /* prevent from setup unneeded object */ 7848 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7849 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7850 if (ispreonly) { 7851 check_ksp_type = KSPPREONLY; 7852 compute_eigs = PETSC_FALSE; 7853 } else { 7854 check_ksp_type = KSPGMRES; 7855 compute_eigs = PETSC_TRUE; 7856 } 7857 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7858 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7859 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7860 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7861 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7862 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7863 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7864 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7865 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7866 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7867 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7868 /* create random vec */ 7869 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7870 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7871 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7872 /* solve coarse problem */ 7873 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7874 /* set eigenvalue estimation if preonly has not been requested */ 7875 if (compute_eigs) { 7876 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7877 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7878 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7879 if (neigs) { 7880 lambda_max = eigs_r[neigs-1]; 7881 lambda_min = eigs_r[0]; 7882 if (pcbddc->use_coarse_estimates) { 7883 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7884 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7885 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7886 } 7887 } 7888 } 7889 } 7890 7891 /* check coarse problem residual error */ 7892 if (pcbddc->dbg_flag) { 7893 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7894 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7895 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7896 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7897 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7898 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7899 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7900 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7901 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7902 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7903 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7904 if (CoarseNullSpace) { 7905 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7906 } 7907 if (compute_eigs) { 7908 PetscReal lambda_max_s,lambda_min_s; 7909 KSPConvergedReason reason; 7910 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7911 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7912 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7913 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7914 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7915 for (i=0;i<neigs;i++) { 7916 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7917 } 7918 } 7919 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7920 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7921 } 7922 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7923 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7924 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7925 if (compute_eigs) { 7926 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7927 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7928 } 7929 } 7930 } 7931 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7932 /* print additional info */ 7933 if (pcbddc->dbg_flag) { 7934 /* waits until all processes reaches this point */ 7935 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7936 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7938 } 7939 7940 /* free memory */ 7941 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7942 PetscFunctionReturn(0); 7943 } 7944 7945 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7946 { 7947 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7948 PC_IS* pcis = (PC_IS*)pc->data; 7949 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7950 IS subset,subset_mult,subset_n; 7951 PetscInt local_size,coarse_size=0; 7952 PetscInt *local_primal_indices=NULL; 7953 const PetscInt *t_local_primal_indices; 7954 PetscErrorCode ierr; 7955 7956 PetscFunctionBegin; 7957 /* Compute global number of coarse dofs */ 7958 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7959 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7960 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7961 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7962 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7963 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7964 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7965 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7966 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7967 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7968 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7969 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7970 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7971 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7972 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7973 7974 /* check numbering */ 7975 if (pcbddc->dbg_flag) { 7976 PetscScalar coarsesum,*array,*array2; 7977 PetscInt i; 7978 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7979 7980 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7981 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7982 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7983 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7984 /* counter */ 7985 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7986 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7987 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7988 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7989 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7990 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7991 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7992 for (i=0;i<pcbddc->local_primal_size;i++) { 7993 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7994 } 7995 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7996 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7997 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7998 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7999 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8000 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8001 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8002 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8003 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8004 for (i=0;i<pcis->n;i++) { 8005 if (array[i] != 0.0 && array[i] != array2[i]) { 8006 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8007 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8008 set_error = PETSC_TRUE; 8009 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8010 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8011 } 8012 } 8013 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8014 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8015 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8016 for (i=0;i<pcis->n;i++) { 8017 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8018 } 8019 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8020 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8021 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8022 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8023 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8024 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8025 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8026 PetscInt *gidxs; 8027 8028 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8029 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8030 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8032 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8033 for (i=0;i<pcbddc->local_primal_size;i++) { 8034 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8035 } 8036 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8037 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8038 } 8039 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8040 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8041 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8042 } 8043 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8044 /* get back data */ 8045 *coarse_size_n = coarse_size; 8046 *local_primal_indices_n = local_primal_indices; 8047 PetscFunctionReturn(0); 8048 } 8049 8050 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8051 { 8052 IS localis_t; 8053 PetscInt i,lsize,*idxs,n; 8054 PetscScalar *vals; 8055 PetscErrorCode ierr; 8056 8057 PetscFunctionBegin; 8058 /* get indices in local ordering exploiting local to global map */ 8059 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8060 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8061 for (i=0;i<lsize;i++) vals[i] = 1.0; 8062 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8063 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8064 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8065 if (idxs) { /* multilevel guard */ 8066 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8067 } 8068 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8069 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8070 ierr = PetscFree(vals);CHKERRQ(ierr); 8071 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8072 /* now compute set in local ordering */ 8073 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8074 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8075 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8076 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8077 for (i=0,lsize=0;i<n;i++) { 8078 if (PetscRealPart(vals[i]) > 0.5) { 8079 lsize++; 8080 } 8081 } 8082 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8083 for (i=0,lsize=0;i<n;i++) { 8084 if (PetscRealPart(vals[i]) > 0.5) { 8085 idxs[lsize++] = i; 8086 } 8087 } 8088 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8089 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8090 *localis = localis_t; 8091 PetscFunctionReturn(0); 8092 } 8093 8094 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8095 { 8096 PC_IS *pcis=(PC_IS*)pc->data; 8097 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8098 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8099 Mat S_j; 8100 PetscInt *used_xadj,*used_adjncy; 8101 PetscBool free_used_adj; 8102 PetscErrorCode ierr; 8103 8104 PetscFunctionBegin; 8105 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8106 free_used_adj = PETSC_FALSE; 8107 if (pcbddc->sub_schurs_layers == -1) { 8108 used_xadj = NULL; 8109 used_adjncy = NULL; 8110 } else { 8111 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8112 used_xadj = pcbddc->mat_graph->xadj; 8113 used_adjncy = pcbddc->mat_graph->adjncy; 8114 } else if (pcbddc->computed_rowadj) { 8115 used_xadj = pcbddc->mat_graph->xadj; 8116 used_adjncy = pcbddc->mat_graph->adjncy; 8117 } else { 8118 PetscBool flg_row=PETSC_FALSE; 8119 const PetscInt *xadj,*adjncy; 8120 PetscInt nvtxs; 8121 8122 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8123 if (flg_row) { 8124 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8125 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8126 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8127 free_used_adj = PETSC_TRUE; 8128 } else { 8129 pcbddc->sub_schurs_layers = -1; 8130 used_xadj = NULL; 8131 used_adjncy = NULL; 8132 } 8133 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8134 } 8135 } 8136 8137 /* setup sub_schurs data */ 8138 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8139 if (!sub_schurs->schur_explicit) { 8140 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8141 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8142 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8143 } else { 8144 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8145 PetscBool isseqaij,need_change = PETSC_FALSE; 8146 PetscInt benign_n; 8147 Mat change = NULL; 8148 Vec scaling = NULL; 8149 IS change_primal = NULL; 8150 8151 if (!pcbddc->use_vertices && reuse_solvers) { 8152 PetscInt n_vertices; 8153 8154 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8155 reuse_solvers = (PetscBool)!n_vertices; 8156 } 8157 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8158 if (!isseqaij) { 8159 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8160 if (matis->A == pcbddc->local_mat) { 8161 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8162 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8163 } else { 8164 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8165 } 8166 } 8167 if (!pcbddc->benign_change_explicit) { 8168 benign_n = pcbddc->benign_n; 8169 } else { 8170 benign_n = 0; 8171 } 8172 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8173 We need a global reduction to avoid possible deadlocks. 8174 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8175 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8176 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8177 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8178 need_change = (PetscBool)(!need_change); 8179 } 8180 /* If the user defines additional constraints, we import them here. 8181 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8182 if (need_change) { 8183 PC_IS *pcisf; 8184 PC_BDDC *pcbddcf; 8185 PC pcf; 8186 8187 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8188 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8189 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8190 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8191 8192 /* hacks */ 8193 pcisf = (PC_IS*)pcf->data; 8194 pcisf->is_B_local = pcis->is_B_local; 8195 pcisf->vec1_N = pcis->vec1_N; 8196 pcisf->BtoNmap = pcis->BtoNmap; 8197 pcisf->n = pcis->n; 8198 pcisf->n_B = pcis->n_B; 8199 pcbddcf = (PC_BDDC*)pcf->data; 8200 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8201 pcbddcf->mat_graph = pcbddc->mat_graph; 8202 pcbddcf->use_faces = PETSC_TRUE; 8203 pcbddcf->use_change_of_basis = PETSC_TRUE; 8204 pcbddcf->use_change_on_faces = PETSC_TRUE; 8205 pcbddcf->use_qr_single = PETSC_TRUE; 8206 pcbddcf->fake_change = PETSC_TRUE; 8207 8208 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8209 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8210 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8211 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8212 change = pcbddcf->ConstraintMatrix; 8213 pcbddcf->ConstraintMatrix = NULL; 8214 8215 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8216 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8217 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8218 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8219 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8220 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8221 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8222 pcf->ops->destroy = NULL; 8223 pcf->ops->reset = NULL; 8224 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8225 } 8226 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8227 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8228 ierr = MatDestroy(&change);CHKERRQ(ierr); 8229 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8230 } 8231 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8232 8233 /* free adjacency */ 8234 if (free_used_adj) { 8235 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8236 } 8237 PetscFunctionReturn(0); 8238 } 8239 8240 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8241 { 8242 PC_IS *pcis=(PC_IS*)pc->data; 8243 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8244 PCBDDCGraph graph; 8245 PetscErrorCode ierr; 8246 8247 PetscFunctionBegin; 8248 /* attach interface graph for determining subsets */ 8249 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8250 IS verticesIS,verticescomm; 8251 PetscInt vsize,*idxs; 8252 8253 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8254 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8255 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8256 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8257 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8258 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8259 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8260 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8261 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8262 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8263 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8264 } else { 8265 graph = pcbddc->mat_graph; 8266 } 8267 /* print some info */ 8268 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8269 IS vertices; 8270 PetscInt nv,nedges,nfaces; 8271 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8272 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8273 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8274 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8280 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8281 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8282 } 8283 8284 /* sub_schurs init */ 8285 if (!pcbddc->sub_schurs) { 8286 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8287 } 8288 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8289 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8290 8291 /* free graph struct */ 8292 if (pcbddc->sub_schurs_rebuild) { 8293 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8294 } 8295 PetscFunctionReturn(0); 8296 } 8297 8298 PetscErrorCode PCBDDCCheckOperator(PC pc) 8299 { 8300 PC_IS *pcis=(PC_IS*)pc->data; 8301 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8302 PetscErrorCode ierr; 8303 8304 PetscFunctionBegin; 8305 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8306 IS zerodiag = NULL; 8307 Mat S_j,B0_B=NULL; 8308 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8309 PetscScalar *p0_check,*array,*array2; 8310 PetscReal norm; 8311 PetscInt i; 8312 8313 /* B0 and B0_B */ 8314 if (zerodiag) { 8315 IS dummy; 8316 8317 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8318 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8319 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8320 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8321 } 8322 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8323 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8324 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8325 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8326 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8327 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8328 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8329 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8330 /* S_j */ 8331 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8332 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8333 8334 /* mimic vector in \widetilde{W}_\Gamma */ 8335 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8336 /* continuous in primal space */ 8337 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8338 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8339 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8340 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8341 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8342 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8343 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8344 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8345 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8346 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8347 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8348 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8349 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8350 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8351 8352 /* assemble rhs for coarse problem */ 8353 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8354 /* local with Schur */ 8355 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8356 if (zerodiag) { 8357 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8358 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8359 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8360 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8361 } 8362 /* sum on primal nodes the local contributions */ 8363 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8364 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8365 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8366 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8367 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8368 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8369 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8370 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8371 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8372 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8373 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8374 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8375 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8376 /* scale primal nodes (BDDC sums contibutions) */ 8377 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8378 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8379 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8380 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8381 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8382 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8383 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8384 /* global: \widetilde{B0}_B w_\Gamma */ 8385 if (zerodiag) { 8386 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8387 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8388 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8389 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8390 } 8391 /* BDDC */ 8392 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8393 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8394 8395 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8396 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8397 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8398 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8399 for (i=0;i<pcbddc->benign_n;i++) { 8400 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8401 } 8402 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8403 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8404 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8405 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8406 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8407 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8408 } 8409 PetscFunctionReturn(0); 8410 } 8411 8412 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8413 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8414 { 8415 Mat At; 8416 IS rows; 8417 PetscInt rst,ren; 8418 PetscErrorCode ierr; 8419 PetscLayout rmap; 8420 8421 PetscFunctionBegin; 8422 rst = ren = 0; 8423 if (ccomm != MPI_COMM_NULL) { 8424 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8425 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8426 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8427 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8428 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8429 } 8430 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8431 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8432 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8433 8434 if (ccomm != MPI_COMM_NULL) { 8435 Mat_MPIAIJ *a,*b; 8436 IS from,to; 8437 Vec gvec; 8438 PetscInt lsize; 8439 8440 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8441 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8442 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8443 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8444 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8445 a = (Mat_MPIAIJ*)At->data; 8446 b = (Mat_MPIAIJ*)(*B)->data; 8447 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8448 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8449 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8450 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8451 b->A = a->A; 8452 b->B = a->B; 8453 8454 b->donotstash = a->donotstash; 8455 b->roworiented = a->roworiented; 8456 b->rowindices = 0; 8457 b->rowvalues = 0; 8458 b->getrowactive = PETSC_FALSE; 8459 8460 (*B)->rmap = rmap; 8461 (*B)->factortype = A->factortype; 8462 (*B)->assembled = PETSC_TRUE; 8463 (*B)->insertmode = NOT_SET_VALUES; 8464 (*B)->preallocated = PETSC_TRUE; 8465 8466 if (a->colmap) { 8467 #if defined(PETSC_USE_CTABLE) 8468 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8469 #else 8470 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8471 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8472 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8473 #endif 8474 } else b->colmap = 0; 8475 if (a->garray) { 8476 PetscInt len; 8477 len = a->B->cmap->n; 8478 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8479 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8480 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8481 } else b->garray = 0; 8482 8483 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8484 b->lvec = a->lvec; 8485 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8486 8487 /* cannot use VecScatterCopy */ 8488 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8489 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8490 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8491 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8492 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8493 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8494 ierr = ISDestroy(&from);CHKERRQ(ierr); 8495 ierr = ISDestroy(&to);CHKERRQ(ierr); 8496 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8497 } 8498 ierr = MatDestroy(&At);CHKERRQ(ierr); 8499 PetscFunctionReturn(0); 8500 } 8501