1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* if range is true, it returns B s.t. span{B} = range(A) 10 if range is false, it returns B s.t. range(B) _|_ range(A) */ 11 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 12 { 13 #if !defined(PETSC_USE_COMPLEX) 14 PetscScalar *uwork,*data,*U, ds = 0.; 15 PetscReal *sing; 16 PetscBLASInt bM,bN,lwork,lierr,di = 1; 17 PetscInt ulw,i,nr,nc,n; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 #if defined(PETSC_MISSING_LAPACK_GESVD) 22 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 23 #else 24 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 25 if (!nr || !nc) PetscFunctionReturn(0); 26 27 /* workspace */ 28 if (!work) { 29 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 30 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 31 } else { 32 ulw = lw; 33 uwork = work; 34 } 35 n = PetscMin(nr,nc); 36 if (!rwork) { 37 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 38 } else { 39 sing = rwork; 40 } 41 42 /* SVD */ 43 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 47 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 48 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 49 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 50 ierr = PetscFPTrapPop();CHKERRQ(ierr); 51 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 52 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 53 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 54 if (!rwork) { 55 ierr = PetscFree(sing);CHKERRQ(ierr); 56 } 57 if (!work) { 58 ierr = PetscFree(uwork);CHKERRQ(ierr); 59 } 60 /* create B */ 61 if (!range) { 62 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 63 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 65 } else { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } 70 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 71 ierr = PetscFree(U);CHKERRQ(ierr); 72 #endif 73 #else /* PETSC_USE_COMPLEX */ 74 PetscFunctionBegin; 75 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 76 #endif 77 PetscFunctionReturn(0); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 PetscErrorCode ierr; 89 Mat GE,GEd; 90 PetscInt rsize,csize,esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 95 if (!esize) PetscFunctionReturn(0); 96 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 97 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 98 99 /* gradients */ 100 ptr = work + 5*esize; 101 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 103 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 104 ierr = MatDestroy(&GE);CHKERRQ(ierr); 105 106 /* constants */ 107 ptr += rsize*csize; 108 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 109 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 110 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 111 ierr = MatDestroy(&GE);CHKERRQ(ierr); 112 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 113 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 114 115 if (corners) { 116 Mat GEc; 117 PetscScalar *vals,v; 118 119 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 120 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 121 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 122 /* v = PetscAbsScalar(vals[0]) */; 123 v = 1.; 124 cvals[0] = vals[0]/v; 125 cvals[1] = vals[1]/v; 126 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 127 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 128 #if defined(PRINT_GDET) 129 { 130 PetscViewer viewer; 131 char filename[256]; 132 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 133 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 134 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 135 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 136 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 137 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 138 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 140 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 141 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 142 } 143 #endif 144 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 145 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 146 } 147 148 PetscFunctionReturn(0); 149 } 150 151 PetscErrorCode PCBDDCNedelecSupport(PC pc) 152 { 153 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 154 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 155 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 156 Vec tvec; 157 PetscSF sfv; 158 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 159 MPI_Comm comm; 160 IS lned,primals,allprimals,nedfieldlocal; 161 IS *eedges,*extrows,*extcols,*alleedges; 162 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 163 PetscScalar *vals,*work; 164 PetscReal *rwork; 165 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 166 PetscInt ne,nv,Lv,order,n,field; 167 PetscInt n_neigh,*neigh,*n_shared,**shared; 168 PetscInt i,j,extmem,cum,maxsize,nee; 169 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 170 PetscInt *sfvleaves,*sfvroots; 171 PetscInt *corners,*cedges; 172 PetscInt *ecount,**eneighs,*vcount,**vneighs; 173 #if defined(PETSC_USE_DEBUG) 174 PetscInt *emarks; 175 #endif 176 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 /* If the discrete gradient is defined for a subset of dofs and global is true, 181 it assumes G is given in global ordering for all the dofs. 182 Otherwise, the ordering is global for the Nedelec field */ 183 order = pcbddc->nedorder; 184 conforming = pcbddc->conforming; 185 field = pcbddc->nedfield; 186 global = pcbddc->nedglobal; 187 setprimal = PETSC_FALSE; 188 print = PETSC_FALSE; 189 singular = PETSC_FALSE; 190 191 /* Command line customization */ 192 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 193 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 194 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 195 ierr = PetscOptionsInt ("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 196 /* print debug info TODO: to be removed */ 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsEnd();CHKERRQ(ierr); 199 200 /* Return if there are no edges in the decomposition and the problem is not singular */ 201 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 202 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 203 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 204 if (!singular) { 205 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 206 lrc[0] = PETSC_FALSE; 207 for (i=0;i<n;i++) { 208 if (PetscRealPart(vals[i]) > 2.) { 209 lrc[0] = PETSC_TRUE; 210 break; 211 } 212 } 213 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 214 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 215 if (!lrc[1]) PetscFunctionReturn(0); 216 } 217 218 /* Get Nedelec field */ 219 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 220 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 221 if (pcbddc->n_ISForDofsLocal && field >= 0) { 222 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 223 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 224 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 225 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 226 ne = n; 227 nedfieldlocal = NULL; 228 global = PETSC_TRUE; 229 } else if (field == PETSC_DECIDE) { 230 PetscInt rst,ren,*idx; 231 232 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 233 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 234 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 235 for (i=rst;i<ren;i++) { 236 PetscInt nc; 237 238 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 239 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 240 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 } 242 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 243 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 244 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 245 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 246 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 247 } else { 248 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 249 } 250 251 /* Sanity checks */ 252 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 253 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 254 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 255 256 /* Just set primal dofs and return */ 257 if (setprimal) { 258 IS enedfieldlocal; 259 PetscInt *eidxs; 260 261 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 262 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 263 if (nedfieldlocal) { 264 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 265 for (i=0,cum=0;i<ne;i++) { 266 if (PetscRealPart(vals[idxs[i]]) > 2.) { 267 eidxs[cum++] = idxs[i]; 268 } 269 } 270 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 271 } else { 272 for (i=0,cum=0;i<ne;i++) { 273 if (PetscRealPart(vals[i]) > 2.) { 274 eidxs[cum++] = i; 275 } 276 } 277 } 278 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 279 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 280 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 281 ierr = PetscFree(eidxs);CHKERRQ(ierr); 282 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 283 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 /* Compute some l2g maps */ 288 if (nedfieldlocal) { 289 IS is; 290 291 /* need to map from the local Nedelec field to local numbering */ 292 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 293 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 294 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 295 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 297 if (global) { 298 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 299 el2g = al2g; 300 } else { 301 IS gis; 302 303 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 304 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 305 ierr = ISDestroy(&gis);CHKERRQ(ierr); 306 } 307 ierr = ISDestroy(&is);CHKERRQ(ierr); 308 } else { 309 /* restore default */ 310 pcbddc->nedfield = -1; 311 /* one ref for the destruction of al2g, one for el2g */ 312 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 313 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 314 el2g = al2g; 315 fl2g = NULL; 316 } 317 318 /* Start communication to drop connections for interior edges (for cc analysis only) */ 319 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 320 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 321 if (nedfieldlocal) { 322 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 324 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 } else { 326 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 327 } 328 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 329 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 330 331 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 332 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 333 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 334 if (global) { 335 PetscInt rst; 336 337 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 338 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 339 if (matis->sf_rootdata[i] < 2) { 340 matis->sf_rootdata[cum++] = i + rst; 341 } 342 } 343 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 344 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 345 } else { 346 PetscInt *tbz; 347 348 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 349 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 350 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 351 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 352 for (i=0,cum=0;i<ne;i++) 353 if (matis->sf_leafdata[idxs[i]] == 1) 354 tbz[cum++] = i; 355 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 357 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 358 ierr = PetscFree(tbz);CHKERRQ(ierr); 359 } 360 } else { /* we need the entire G to infer the nullspace */ 361 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 362 G = pcbddc->discretegradient; 363 } 364 365 /* Extract subdomain relevant rows of G */ 366 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 367 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 368 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 369 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISDestroy(&lned);CHKERRQ(ierr); 371 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 372 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 373 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 374 375 /* SF for nodal dofs communications */ 376 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 377 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 378 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 379 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 380 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 382 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 384 i = singular ? 2 : 1; 385 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 386 387 /* Destroy temporary G created in MATIS format and modified G */ 388 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 389 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 390 ierr = MatDestroy(&G);CHKERRQ(ierr); 391 392 if (print) { 393 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 394 ierr = MatView(lG,NULL);CHKERRQ(ierr); 395 } 396 397 /* Save lG for values insertion in change of basis */ 398 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 399 400 /* Analyze the edge-nodes connections (duplicate lG) */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 402 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 403 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 404 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 405 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 408 /* need to import the boundary specification to ensure the 409 proper detection of coarse edges' endpoints */ 410 if (pcbddc->DirichletBoundariesLocal) { 411 IS is; 412 413 if (fl2g) { 414 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 415 } else { 416 is = pcbddc->DirichletBoundariesLocal; 417 } 418 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 419 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 420 for (i=0;i<cum;i++) { 421 if (idxs[i] >= 0) { 422 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 423 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 424 } 425 } 426 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 427 if (fl2g) { 428 ierr = ISDestroy(&is);CHKERRQ(ierr); 429 } 430 } 431 if (pcbddc->NeumannBoundariesLocal) { 432 IS is; 433 434 if (fl2g) { 435 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 436 } else { 437 is = pcbddc->NeumannBoundariesLocal; 438 } 439 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 441 for (i=0;i<cum;i++) { 442 if (idxs[i] >= 0) { 443 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 444 } 445 } 446 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 447 if (fl2g) { 448 ierr = ISDestroy(&is);CHKERRQ(ierr); 449 } 450 } 451 452 /* Count neighs per dof */ 453 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 454 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 455 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 456 for (i=1,cum=0;i<n_neigh;i++) { 457 cum += n_shared[i]; 458 for (j=0;j<n_shared[i];j++) { 459 ecount[shared[i][j]]++; 460 } 461 } 462 if (ne) { 463 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 464 } 465 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 466 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 467 for (i=1;i<n_neigh;i++) { 468 for (j=0;j<n_shared[i];j++) { 469 PetscInt k = shared[i][j]; 470 eneighs[k][ecount[k]] = neigh[i]; 471 ecount[k]++; 472 } 473 } 474 for (i=0;i<ne;i++) { 475 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 476 } 477 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 478 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 479 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 480 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 481 for (i=1,cum=0;i<n_neigh;i++) { 482 cum += n_shared[i]; 483 for (j=0;j<n_shared[i];j++) { 484 vcount[shared[i][j]]++; 485 } 486 } 487 if (nv) { 488 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 489 } 490 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 491 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 492 for (i=1;i<n_neigh;i++) { 493 for (j=0;j<n_shared[i];j++) { 494 PetscInt k = shared[i][j]; 495 vneighs[k][vcount[k]] = neigh[i]; 496 vcount[k]++; 497 } 498 } 499 for (i=0;i<nv;i++) { 500 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 501 } 502 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 503 504 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 505 for proper detection of coarse edges' endpoints */ 506 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 507 for (i=0;i<ne;i++) { 508 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 509 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 510 } 511 } 512 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 513 if (!conforming) { 514 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 515 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 516 } 517 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 518 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 519 cum = 0; 520 for (i=0;i<ne;i++) { 521 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 522 if (!PetscBTLookup(btee,i)) { 523 marks[cum++] = i; 524 continue; 525 } 526 /* set badly connected edge dofs as primal */ 527 if (!conforming) { 528 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 529 marks[cum++] = i; 530 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 531 for (j=ii[i];j<ii[i+1];j++) { 532 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 533 } 534 } else { 535 /* every edge dofs should be connected trough a certain number of nodal dofs 536 to other edge dofs belonging to coarse edges 537 - at most 2 endpoints 538 - order-1 interior nodal dofs 539 - no undefined nodal dofs (nconn < order) 540 */ 541 PetscInt ends = 0,ints = 0, undef = 0; 542 for (j=ii[i];j<ii[i+1];j++) { 543 PetscInt v = jj[j],k; 544 PetscInt nconn = iit[v+1]-iit[v]; 545 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 546 if (nconn > order) ends++; 547 else if (nconn == order) ints++; 548 else undef++; 549 } 550 if (undef || ends > 2 || ints != order -1) { 551 marks[cum++] = i; 552 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 553 for (j=ii[i];j<ii[i+1];j++) { 554 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 555 } 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i+1] != ii[i]) { 561 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 562 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 563 } 564 } 565 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 566 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 567 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 568 if (!conforming) { 569 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 570 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 571 } 572 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 573 574 /* identify splitpoints and corner candidates */ 575 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 576 if (print) { 577 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 578 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 579 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 580 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 581 } 582 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 583 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 584 for (i=0;i<nv;i++) { 585 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 586 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 587 if (!order) { /* variable order */ 588 PetscReal vorder = 0.; 589 590 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 591 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 592 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 593 ord = 1; 594 } 595 #if defined(PETSC_USE_DEBUG) 596 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 597 #endif 598 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 599 if (PetscBTLookup(btbd,jj[j])) { 600 bdir = PETSC_TRUE; 601 break; 602 } 603 if (vc != ecount[jj[j]]) { 604 sneighs = PETSC_FALSE; 605 } else { 606 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 607 for (k=0;k<vc;k++) { 608 if (vn[k] != en[k]) { 609 sneighs = PETSC_FALSE; 610 break; 611 } 612 } 613 } 614 } 615 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 616 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 617 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 618 } else if (test == ord) { 619 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 620 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else { 623 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 624 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 625 } 626 } 627 } 628 ierr = PetscFree(ecount);CHKERRQ(ierr); 629 ierr = PetscFree(vcount);CHKERRQ(ierr); 630 if (ne) { 631 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 632 } 633 if (nv) { 634 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 635 } 636 ierr = PetscFree(eneighs);CHKERRQ(ierr); 637 ierr = PetscFree(vneighs);CHKERRQ(ierr); 638 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 639 640 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 641 if (order != 1) { 642 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 643 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 644 for (i=0;i<nv;i++) { 645 if (PetscBTLookup(btvcand,i)) { 646 PetscBool found = PETSC_FALSE; 647 for (j=ii[i];j<ii[i+1] && !found;j++) { 648 PetscInt k,e = jj[j]; 649 if (PetscBTLookup(bte,e)) continue; 650 for (k=iit[e];k<iit[e+1];k++) { 651 PetscInt v = jjt[k]; 652 if (v != i && PetscBTLookup(btvcand,v)) { 653 found = PETSC_TRUE; 654 break; 655 } 656 } 657 } 658 if (!found) { 659 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 660 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 661 } else { 662 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 663 } 664 } 665 } 666 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 667 } 668 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 669 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 670 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 671 672 /* Get the local G^T explicitly */ 673 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 674 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 675 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 676 677 /* Mark interior nodal dofs */ 678 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 679 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 680 for (i=1;i<n_neigh;i++) { 681 for (j=0;j<n_shared[i];j++) { 682 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 683 } 684 } 685 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 686 687 /* communicate corners and splitpoints */ 688 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 689 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 690 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 691 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 692 693 if (print) { 694 IS tbz; 695 696 cum = 0; 697 for (i=0;i<nv;i++) 698 if (sfvleaves[i]) 699 vmarks[cum++] = i; 700 701 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 702 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 703 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 704 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 705 } 706 707 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 708 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 709 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 710 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 711 712 /* Zero rows of lGt corresponding to identified corners 713 and interior nodal dofs */ 714 cum = 0; 715 for (i=0;i<nv;i++) { 716 if (sfvleaves[i]) { 717 vmarks[cum++] = i; 718 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 719 } 720 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 721 } 722 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 723 if (print) { 724 IS tbz; 725 726 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 727 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 728 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 729 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 730 } 731 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 732 ierr = PetscFree(vmarks);CHKERRQ(ierr); 733 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 734 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 735 736 /* Recompute G */ 737 ierr = MatDestroy(&lG);CHKERRQ(ierr); 738 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 739 if (print) { 740 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 741 ierr = MatView(lG,NULL);CHKERRQ(ierr); 742 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 743 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 744 } 745 746 /* Get primal dofs (if any) */ 747 cum = 0; 748 for (i=0;i<ne;i++) { 749 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 750 } 751 if (fl2g) { 752 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 753 } 754 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 755 if (print) { 756 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 757 ierr = ISView(primals,NULL);CHKERRQ(ierr); 758 } 759 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 760 /* TODO: what if the user passed in some of them ? */ 761 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 762 ierr = ISDestroy(&primals);CHKERRQ(ierr); 763 764 /* Compute edge connectivity */ 765 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 766 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 767 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 768 if (fl2g) { 769 PetscBT btf; 770 PetscInt *iia,*jja,*iiu,*jju; 771 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 772 773 /* create CSR for all local dofs */ 774 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 775 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 776 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 777 iiu = pcbddc->mat_graph->xadj; 778 jju = pcbddc->mat_graph->adjncy; 779 } else if (pcbddc->use_local_adj) { 780 rest = PETSC_TRUE; 781 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 782 } else { 783 free = PETSC_TRUE; 784 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 785 iiu[0] = 0; 786 for (i=0;i<n;i++) { 787 iiu[i+1] = i+1; 788 jju[i] = -1; 789 } 790 } 791 792 /* import sizes of CSR */ 793 iia[0] = 0; 794 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 795 796 /* overwrite entries corresponding to the Nedelec field */ 797 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 798 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 799 for (i=0;i<ne;i++) { 800 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 801 iia[idxs[i]+1] = ii[i+1]-ii[i]; 802 } 803 804 /* iia in CSR */ 805 for (i=0;i<n;i++) iia[i+1] += iia[i]; 806 807 /* jja in CSR */ 808 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 809 for (i=0;i<n;i++) 810 if (!PetscBTLookup(btf,i)) 811 for (j=0;j<iiu[i+1]-iiu[i];j++) 812 jja[iia[i]+j] = jju[iiu[i]+j]; 813 814 /* map edge dofs connectivity */ 815 if (jj) { 816 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 817 for (i=0;i<ne;i++) { 818 PetscInt e = idxs[i]; 819 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 820 } 821 } 822 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 823 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 824 if (rest) { 825 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 826 } 827 if (free) { 828 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 829 } 830 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 831 } else { 832 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 833 } 834 835 /* Analyze interface for edge dofs */ 836 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 837 pcbddc->mat_graph->twodim = PETSC_FALSE; 838 839 /* Get coarse edges in the edge space */ 840 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 841 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 842 843 if (fl2g) { 844 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 845 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 846 for (i=0;i<nee;i++) { 847 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 848 } 849 } else { 850 eedges = alleedges; 851 primals = allprimals; 852 } 853 854 /* Mark fine edge dofs with their coarse edge id */ 855 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 856 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 857 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 858 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 859 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 860 if (print) { 861 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 862 ierr = ISView(primals,NULL);CHKERRQ(ierr); 863 } 864 865 maxsize = 0; 866 for (i=0;i<nee;i++) { 867 PetscInt size,mark = i+1; 868 869 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 870 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 871 for (j=0;j<size;j++) marks[idxs[j]] = mark; 872 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 873 maxsize = PetscMax(maxsize,size); 874 } 875 876 /* Find coarse edge endpoints */ 877 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 878 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 879 for (i=0;i<nee;i++) { 880 PetscInt mark = i+1,size; 881 882 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 883 if (!size && nedfieldlocal) continue; 884 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 885 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 886 if (print) { 887 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 888 ISView(eedges[i],NULL); 889 } 890 for (j=0;j<size;j++) { 891 PetscInt k, ee = idxs[j]; 892 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 893 for (k=ii[ee];k<ii[ee+1];k++) { 894 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 895 if (PetscBTLookup(btv,jj[k])) { 896 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 897 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 898 PetscInt k2; 899 PetscBool corner = PETSC_FALSE; 900 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 901 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 902 /* it's a corner if either is connected with an edge dof belonging to a different cc or 903 if the edge dof lie on the natural part of the boundary */ 904 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 905 corner = PETSC_TRUE; 906 break; 907 } 908 } 909 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 910 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 911 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 912 } else { 913 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 914 } 915 } 916 } 917 } 918 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 919 } 920 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 921 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 922 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 923 924 /* Reset marked primal dofs */ 925 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 926 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 927 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 928 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 929 930 /* Now use the initial lG */ 931 ierr = MatDestroy(&lG);CHKERRQ(ierr); 932 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 933 lG = lGinit; 934 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 935 936 /* Compute extended cols indices */ 937 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 938 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 939 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 940 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 941 i *= maxsize; 942 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 943 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 944 eerr = PETSC_FALSE; 945 for (i=0;i<nee;i++) { 946 PetscInt size,found = 0; 947 948 cum = 0; 949 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 950 if (!size && nedfieldlocal) continue; 951 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 952 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 953 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 954 for (j=0;j<size;j++) { 955 PetscInt k,ee = idxs[j]; 956 for (k=ii[ee];k<ii[ee+1];k++) { 957 PetscInt vv = jj[k]; 958 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 959 else if (!PetscBTLookupSet(btvc,vv)) found++; 960 } 961 } 962 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 963 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 964 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 965 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 966 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 967 /* it may happen that endpoints are not defined at this point 968 if it is the case, mark this edge for a second pass */ 969 if (cum != size -1 || found != 2) { 970 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 971 if (print) { 972 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 973 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 974 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 975 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 976 } 977 eerr = PETSC_TRUE; 978 } 979 } 980 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 981 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 982 if (done) { 983 PetscInt *newprimals; 984 985 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 986 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 987 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 988 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 989 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 990 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 991 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 992 for (i=0;i<nee;i++) { 993 PetscBool has_candidates = PETSC_FALSE; 994 if (PetscBTLookup(bter,i)) { 995 PetscInt size,mark = i+1; 996 997 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 998 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 999 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1000 for (j=0;j<size;j++) { 1001 PetscInt k,ee = idxs[j]; 1002 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1003 for (k=ii[ee];k<ii[ee+1];k++) { 1004 /* set all candidates located on the edge as corners */ 1005 if (PetscBTLookup(btvcand,jj[k])) { 1006 PetscInt k2,vv = jj[k]; 1007 has_candidates = PETSC_TRUE; 1008 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1009 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1010 /* set all edge dofs connected to candidate as primals */ 1011 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1012 if (marks[jjt[k2]] == mark) { 1013 PetscInt k3,ee2 = jjt[k2]; 1014 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1015 newprimals[cum++] = ee2; 1016 /* finally set the new corners */ 1017 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1019 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1020 } 1021 } 1022 } 1023 } else { 1024 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1025 } 1026 } 1027 } 1028 if (!has_candidates) { /* circular edge */ 1029 PetscInt k, ee = idxs[0],*tmarks; 1030 1031 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1032 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1033 for (k=ii[ee];k<ii[ee+1];k++) { 1034 PetscInt k2; 1035 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1036 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1037 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1038 } 1039 for (j=0;j<size;j++) { 1040 if (tmarks[idxs[j]] > 1) { 1041 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1042 newprimals[cum++] = idxs[j]; 1043 } 1044 } 1045 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1046 } 1047 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1048 } 1049 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1050 } 1051 ierr = PetscFree(extcols);CHKERRQ(ierr); 1052 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1053 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1054 if (fl2g) { 1055 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1056 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1057 for (i=0;i<nee;i++) { 1058 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1059 } 1060 ierr = PetscFree(eedges);CHKERRQ(ierr); 1061 } 1062 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1063 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1064 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1065 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1066 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1067 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1068 pcbddc->mat_graph->twodim = PETSC_FALSE; 1069 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1070 if (fl2g) { 1071 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1072 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1073 for (i=0;i<nee;i++) { 1074 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1075 } 1076 } else { 1077 eedges = alleedges; 1078 primals = allprimals; 1079 } 1080 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1081 1082 /* Mark again */ 1083 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1084 for (i=0;i<nee;i++) { 1085 PetscInt size,mark = i+1; 1086 1087 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1088 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1089 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1090 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1091 } 1092 if (print) { 1093 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1094 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1095 } 1096 1097 /* Recompute extended cols */ 1098 eerr = PETSC_FALSE; 1099 for (i=0;i<nee;i++) { 1100 PetscInt size; 1101 1102 cum = 0; 1103 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1104 if (!size && nedfieldlocal) continue; 1105 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1106 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1107 for (j=0;j<size;j++) { 1108 PetscInt k,ee = idxs[j]; 1109 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1110 } 1111 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1112 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1113 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1114 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1115 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1116 if (cum != size -1) { 1117 if (print) { 1118 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1119 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1121 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1122 } 1123 eerr = PETSC_TRUE; 1124 } 1125 } 1126 } 1127 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1128 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1129 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1130 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1131 /* an error should not occur at this point */ 1132 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1133 1134 /* Check the number of endpoints */ 1135 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1136 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1137 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1138 for (i=0;i<nee;i++) { 1139 PetscInt size, found = 0, gc[2]; 1140 1141 /* init with defaults */ 1142 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1143 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1144 if (!size && nedfieldlocal) continue; 1145 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1146 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1147 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1148 for (j=0;j<size;j++) { 1149 PetscInt k,ee = idxs[j]; 1150 for (k=ii[ee];k<ii[ee+1];k++) { 1151 PetscInt vv = jj[k]; 1152 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1153 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1154 corners[i*2+found++] = vv; 1155 } 1156 } 1157 } 1158 if (found != 2) { 1159 PetscInt e; 1160 if (fl2g) { 1161 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1162 } else { 1163 e = idxs[0]; 1164 } 1165 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1166 } 1167 1168 /* get primal dof index on this coarse edge */ 1169 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1170 if (gc[0] > gc[1]) { 1171 PetscInt swap = corners[2*i]; 1172 corners[2*i] = corners[2*i+1]; 1173 corners[2*i+1] = swap; 1174 } 1175 cedges[i] = idxs[size-1]; 1176 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1177 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1178 } 1179 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1180 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1181 1182 #if defined(PETSC_USE_DEBUG) 1183 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1184 not interfere with neighbouring coarse edges */ 1185 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1186 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1187 for (i=0;i<nv;i++) { 1188 PetscInt emax = 0,eemax = 0; 1189 1190 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1191 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1192 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1193 for (j=1;j<nee+1;j++) { 1194 if (emax < emarks[j]) { 1195 emax = emarks[j]; 1196 eemax = j; 1197 } 1198 } 1199 /* not relevant for edges */ 1200 if (!eemax) continue; 1201 1202 for (j=ii[i];j<ii[i+1];j++) { 1203 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1204 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1205 } 1206 } 1207 } 1208 ierr = PetscFree(emarks);CHKERRQ(ierr); 1209 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1210 #endif 1211 1212 /* Compute extended rows indices for edge blocks of the change of basis */ 1213 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1215 extmem *= maxsize; 1216 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1217 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1218 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1219 for (i=0;i<nv;i++) { 1220 PetscInt mark = 0,size,start; 1221 1222 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1223 for (j=ii[i];j<ii[i+1];j++) 1224 if (marks[jj[j]] && !mark) 1225 mark = marks[jj[j]]; 1226 1227 /* not relevant */ 1228 if (!mark) continue; 1229 1230 /* import extended row */ 1231 mark--; 1232 start = mark*extmem+extrowcum[mark]; 1233 size = ii[i+1]-ii[i]; 1234 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1235 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1236 extrowcum[mark] += size; 1237 } 1238 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1239 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1240 ierr = PetscFree(marks);CHKERRQ(ierr); 1241 1242 /* Compress extrows */ 1243 cum = 0; 1244 for (i=0;i<nee;i++) { 1245 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1246 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1247 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1248 cum = PetscMax(cum,size); 1249 } 1250 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1251 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1252 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1253 1254 /* Workspace for lapack inner calls and VecSetValues */ 1255 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1256 1257 /* Create change of basis matrix (preallocation can be improved) */ 1258 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1259 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1260 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1261 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1262 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1263 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1264 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1265 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1266 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1267 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1268 1269 /* Defaults to identity */ 1270 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1271 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1272 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1273 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1274 1275 /* Create discrete gradient for the coarser level if needed */ 1276 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1277 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1278 if (pcbddc->current_level < pcbddc->max_levels) { 1279 ISLocalToGlobalMapping cel2g,cvl2g; 1280 IS wis,gwis; 1281 PetscInt cnv,cne; 1282 1283 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1284 if (fl2g) { 1285 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1286 } else { 1287 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1288 pcbddc->nedclocal = wis; 1289 } 1290 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1291 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1292 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1293 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1294 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1296 1297 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1298 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1300 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1301 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1302 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1304 1305 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1306 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1307 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1308 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1309 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1310 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1311 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1312 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1313 } 1314 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1315 1316 #if defined(PRINT_GDET) 1317 inc = 0; 1318 lev = pcbddc->current_level; 1319 #endif 1320 1321 /* Insert values in the change of basis matrix */ 1322 for (i=0;i<nee;i++) { 1323 Mat Gins = NULL, GKins = NULL; 1324 IS cornersis = NULL; 1325 PetscScalar cvals[2]; 1326 1327 if (pcbddc->nedcG) { 1328 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1329 } 1330 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1331 if (Gins && GKins) { 1332 PetscScalar *data; 1333 const PetscInt *rows,*cols; 1334 PetscInt nrh,nch,nrc,ncc; 1335 1336 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1337 /* H1 */ 1338 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1339 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1340 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1341 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1342 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1343 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1344 /* complement */ 1345 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1346 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1347 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1348 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1349 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1350 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1351 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1352 1353 /* coarse discrete gradient */ 1354 if (pcbddc->nedcG) { 1355 PetscInt cols[2]; 1356 1357 cols[0] = 2*i; 1358 cols[1] = 2*i+1; 1359 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1360 } 1361 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1362 } 1363 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1364 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1365 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1366 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1367 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1368 } 1369 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1370 1371 /* Start assembling */ 1372 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1373 if (pcbddc->nedcG) { 1374 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 } 1376 1377 /* Free */ 1378 if (fl2g) { 1379 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1380 for (i=0;i<nee;i++) { 1381 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(eedges);CHKERRQ(ierr); 1384 } 1385 1386 /* hack mat_graph with primal dofs on the coarse edges */ 1387 { 1388 PCBDDCGraph graph = pcbddc->mat_graph; 1389 PetscInt *oqueue = graph->queue; 1390 PetscInt *ocptr = graph->cptr; 1391 PetscInt ncc,*idxs; 1392 1393 /* find first primal edge */ 1394 if (pcbddc->nedclocal) { 1395 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1396 } else { 1397 if (fl2g) { 1398 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1399 } 1400 idxs = cedges; 1401 } 1402 cum = 0; 1403 while (cum < nee && cedges[cum] < 0) cum++; 1404 1405 /* adapt connected components */ 1406 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1407 graph->cptr[0] = 0; 1408 for (i=0,ncc=0;i<graph->ncc;i++) { 1409 PetscInt lc = ocptr[i+1]-ocptr[i]; 1410 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1411 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1412 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1413 ncc++; 1414 lc--; 1415 cum++; 1416 while (cum < nee && cedges[cum] < 0) cum++; 1417 } 1418 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1419 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1420 ncc++; 1421 } 1422 graph->ncc = ncc; 1423 if (pcbddc->nedclocal) { 1424 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1427 } 1428 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1429 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1430 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1431 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1432 1433 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1434 ierr = PetscFree(extrow);CHKERRQ(ierr); 1435 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1436 ierr = PetscFree(corners);CHKERRQ(ierr); 1437 ierr = PetscFree(cedges);CHKERRQ(ierr); 1438 ierr = PetscFree(extrows);CHKERRQ(ierr); 1439 ierr = PetscFree(extcols);CHKERRQ(ierr); 1440 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1441 1442 /* Complete assembling */ 1443 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1444 if (pcbddc->nedcG) { 1445 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 #if 0 1447 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1448 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1449 #endif 1450 } 1451 1452 /* set change of basis */ 1453 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1454 ierr = MatDestroy(&T);CHKERRQ(ierr); 1455 1456 PetscFunctionReturn(0); 1457 } 1458 1459 /* the near-null space of BDDC carries information on quadrature weights, 1460 and these can be collinear -> so cheat with MatNullSpaceCreate 1461 and create a suitable set of basis vectors first */ 1462 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1463 { 1464 PetscErrorCode ierr; 1465 PetscInt i; 1466 1467 PetscFunctionBegin; 1468 for (i=0;i<nvecs;i++) { 1469 PetscInt first,last; 1470 1471 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1472 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1473 if (i>=first && i < last) { 1474 PetscScalar *data; 1475 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1476 if (!has_const) { 1477 data[i-first] = 1.; 1478 } else { 1479 data[2*i-first] = 1./PetscSqrtReal(2.); 1480 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1481 } 1482 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1483 } 1484 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1485 } 1486 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1487 for (i=0;i<nvecs;i++) { /* reset vectors */ 1488 PetscInt first,last; 1489 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1490 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1491 if (i>=first && i < last) { 1492 PetscScalar *data; 1493 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1494 if (!has_const) { 1495 data[i-first] = 0.; 1496 } else { 1497 data[2*i-first] = 0.; 1498 data[2*i-first+1] = 0.; 1499 } 1500 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1501 } 1502 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1503 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1504 } 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1509 { 1510 Mat loc_divudotp; 1511 Vec p,v,vins,quad_vec,*quad_vecs; 1512 ISLocalToGlobalMapping map; 1513 IS *faces,*edges; 1514 PetscScalar *vals; 1515 const PetscScalar *array; 1516 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1517 PetscMPIInt rank; 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1522 if (graph->twodim) { 1523 lmaxneighs = 2; 1524 } else { 1525 lmaxneighs = 1; 1526 for (i=0;i<ne;i++) { 1527 const PetscInt *idxs; 1528 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1529 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1530 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1531 } 1532 lmaxneighs++; /* graph count does not include self */ 1533 } 1534 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1535 maxsize = 0; 1536 for (i=0;i<ne;i++) { 1537 PetscInt nn; 1538 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1539 maxsize = PetscMax(maxsize,nn); 1540 } 1541 for (i=0;i<nf;i++) { 1542 PetscInt nn; 1543 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1544 maxsize = PetscMax(maxsize,nn); 1545 } 1546 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1547 /* create vectors to hold quadrature weights */ 1548 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1549 if (!transpose) { 1550 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1551 } else { 1552 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1553 } 1554 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1555 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1556 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1557 for (i=0;i<maxneighs;i++) { 1558 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1559 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1560 } 1561 1562 /* compute local quad vec */ 1563 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1564 if (!transpose) { 1565 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1566 } else { 1567 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1568 } 1569 ierr = VecSet(p,1.);CHKERRQ(ierr); 1570 if (!transpose) { 1571 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1572 } else { 1573 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1574 } 1575 if (vl2l) { 1576 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1577 } else { 1578 vins = v; 1579 } 1580 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1581 ierr = VecDestroy(&p);CHKERRQ(ierr); 1582 1583 /* insert in global quadrature vecs */ 1584 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1585 for (i=0;i<nf;i++) { 1586 const PetscInt *idxs; 1587 PetscInt idx,nn,j; 1588 1589 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1590 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1591 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1592 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1593 idx = -(idx+1); 1594 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1595 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1596 } 1597 for (i=0;i<ne;i++) { 1598 const PetscInt *idxs; 1599 PetscInt idx,nn,j; 1600 1601 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1602 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1603 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1604 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1605 idx = -(idx+1); 1606 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1608 } 1609 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1610 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1611 if (vl2l) { 1612 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1613 } 1614 ierr = VecDestroy(&v);CHKERRQ(ierr); 1615 ierr = PetscFree(vals);CHKERRQ(ierr); 1616 1617 /* assemble near null space */ 1618 for (i=0;i<maxneighs;i++) { 1619 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1620 } 1621 for (i=0;i<maxneighs;i++) { 1622 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1623 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1624 } 1625 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 1630 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1631 { 1632 PetscErrorCode ierr; 1633 Vec local,global; 1634 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1635 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1636 1637 PetscFunctionBegin; 1638 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1639 /* need to convert from global to local topology information and remove references to information in global ordering */ 1640 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1641 if (pcbddc->user_provided_isfordofs) { 1642 if (pcbddc->n_ISForDofs) { 1643 PetscInt i; 1644 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1645 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1646 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1647 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1648 } 1649 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1650 pcbddc->n_ISForDofs = 0; 1651 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1652 } 1653 } else { 1654 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1655 PetscInt i, n = matis->A->rmap->n; 1656 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1657 if (i > 1) { 1658 pcbddc->n_ISForDofsLocal = i; 1659 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1660 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1661 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1662 } 1663 } 1664 } else { 1665 PetscInt i; 1666 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1667 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1668 } 1669 } 1670 } 1671 1672 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1673 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1674 } else if (pcbddc->DirichletBoundariesLocal) { 1675 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1676 } 1677 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1678 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1679 } else if (pcbddc->NeumannBoundariesLocal) { 1680 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1681 } 1682 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1683 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1684 } 1685 ierr = VecDestroy(&global);CHKERRQ(ierr); 1686 ierr = VecDestroy(&local);CHKERRQ(ierr); 1687 1688 PetscFunctionReturn(0); 1689 } 1690 1691 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1692 { 1693 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1694 PetscErrorCode ierr; 1695 IS nis; 1696 const PetscInt *idxs; 1697 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1698 PetscBool *ld; 1699 1700 PetscFunctionBegin; 1701 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1702 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1703 if (mop == MPI_LAND) { 1704 /* init rootdata with true */ 1705 ld = (PetscBool*) matis->sf_rootdata; 1706 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1707 } else { 1708 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1709 } 1710 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1711 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1712 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1713 ld = (PetscBool*) matis->sf_leafdata; 1714 for (i=0;i<nd;i++) 1715 if (-1 < idxs[i] && idxs[i] < n) 1716 ld[idxs[i]] = PETSC_TRUE; 1717 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1718 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1719 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1720 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1721 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1722 if (mop == MPI_LAND) { 1723 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1724 } else { 1725 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1726 } 1727 for (i=0,nnd=0;i<n;i++) 1728 if (ld[i]) 1729 nidxs[nnd++] = i; 1730 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1731 ierr = ISDestroy(is);CHKERRQ(ierr); 1732 *is = nis; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1737 { 1738 PC_IS *pcis = (PC_IS*)(pc->data); 1739 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1740 PetscErrorCode ierr; 1741 1742 PetscFunctionBegin; 1743 if (!pcbddc->benign_have_null) { 1744 PetscFunctionReturn(0); 1745 } 1746 if (pcbddc->ChangeOfBasisMatrix) { 1747 Vec swap; 1748 1749 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1750 swap = pcbddc->work_change; 1751 pcbddc->work_change = r; 1752 r = swap; 1753 } 1754 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1755 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1756 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1757 ierr = VecSet(z,0.);CHKERRQ(ierr); 1758 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1759 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1760 if (pcbddc->ChangeOfBasisMatrix) { 1761 pcbddc->work_change = r; 1762 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1763 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1764 } 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1769 { 1770 PCBDDCBenignMatMult_ctx ctx; 1771 PetscErrorCode ierr; 1772 PetscBool apply_right,apply_left,reset_x; 1773 1774 PetscFunctionBegin; 1775 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1776 if (transpose) { 1777 apply_right = ctx->apply_left; 1778 apply_left = ctx->apply_right; 1779 } else { 1780 apply_right = ctx->apply_right; 1781 apply_left = ctx->apply_left; 1782 } 1783 reset_x = PETSC_FALSE; 1784 if (apply_right) { 1785 const PetscScalar *ax; 1786 PetscInt nl,i; 1787 1788 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1789 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1790 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1791 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1792 for (i=0;i<ctx->benign_n;i++) { 1793 PetscScalar sum,val; 1794 const PetscInt *idxs; 1795 PetscInt nz,j; 1796 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1797 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1798 sum = 0.; 1799 if (ctx->apply_p0) { 1800 val = ctx->work[idxs[nz-1]]; 1801 for (j=0;j<nz-1;j++) { 1802 sum += ctx->work[idxs[j]]; 1803 ctx->work[idxs[j]] += val; 1804 } 1805 } else { 1806 for (j=0;j<nz-1;j++) { 1807 sum += ctx->work[idxs[j]]; 1808 } 1809 } 1810 ctx->work[idxs[nz-1]] -= sum; 1811 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1812 } 1813 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1814 reset_x = PETSC_TRUE; 1815 } 1816 if (transpose) { 1817 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1818 } else { 1819 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1820 } 1821 if (reset_x) { 1822 ierr = VecResetArray(x);CHKERRQ(ierr); 1823 } 1824 if (apply_left) { 1825 PetscScalar *ay; 1826 PetscInt i; 1827 1828 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1829 for (i=0;i<ctx->benign_n;i++) { 1830 PetscScalar sum,val; 1831 const PetscInt *idxs; 1832 PetscInt nz,j; 1833 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1834 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1835 val = -ay[idxs[nz-1]]; 1836 if (ctx->apply_p0) { 1837 sum = 0.; 1838 for (j=0;j<nz-1;j++) { 1839 sum += ay[idxs[j]]; 1840 ay[idxs[j]] += val; 1841 } 1842 ay[idxs[nz-1]] += sum; 1843 } else { 1844 for (j=0;j<nz-1;j++) { 1845 ay[idxs[j]] += val; 1846 } 1847 ay[idxs[nz-1]] = 0.; 1848 } 1849 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1850 } 1851 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1852 } 1853 PetscFunctionReturn(0); 1854 } 1855 1856 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1857 { 1858 PetscErrorCode ierr; 1859 1860 PetscFunctionBegin; 1861 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 1865 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1866 { 1867 PetscErrorCode ierr; 1868 1869 PetscFunctionBegin; 1870 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1875 { 1876 PC_IS *pcis = (PC_IS*)pc->data; 1877 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1878 PCBDDCBenignMatMult_ctx ctx; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 if (!restore) { 1883 Mat A_IB,A_BI; 1884 PetscScalar *work; 1885 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1886 1887 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1888 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1889 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1890 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1891 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1892 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1893 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1894 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1895 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1896 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1897 ctx->apply_left = PETSC_TRUE; 1898 ctx->apply_right = PETSC_FALSE; 1899 ctx->apply_p0 = PETSC_FALSE; 1900 ctx->benign_n = pcbddc->benign_n; 1901 if (reuse) { 1902 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1903 ctx->free = PETSC_FALSE; 1904 } else { /* TODO: could be optimized for successive solves */ 1905 ISLocalToGlobalMapping N_to_D; 1906 PetscInt i; 1907 1908 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1909 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1910 for (i=0;i<pcbddc->benign_n;i++) { 1911 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1912 } 1913 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1914 ctx->free = PETSC_TRUE; 1915 } 1916 ctx->A = pcis->A_IB; 1917 ctx->work = work; 1918 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1919 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1920 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1921 pcis->A_IB = A_IB; 1922 1923 /* A_BI as A_IB^T */ 1924 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1925 pcbddc->benign_original_mat = pcis->A_BI; 1926 pcis->A_BI = A_BI; 1927 } else { 1928 if (!pcbddc->benign_original_mat) { 1929 PetscFunctionReturn(0); 1930 } 1931 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1932 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1933 pcis->A_IB = ctx->A; 1934 ctx->A = NULL; 1935 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1936 pcis->A_BI = pcbddc->benign_original_mat; 1937 pcbddc->benign_original_mat = NULL; 1938 if (ctx->free) { 1939 PetscInt i; 1940 for (i=0;i<ctx->benign_n;i++) { 1941 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1942 } 1943 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1944 } 1945 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1946 ierr = PetscFree(ctx);CHKERRQ(ierr); 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 /* used just in bddc debug mode */ 1952 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1953 { 1954 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1955 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1956 Mat An; 1957 PetscErrorCode ierr; 1958 1959 PetscFunctionBegin; 1960 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1961 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1962 if (is1) { 1963 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1964 ierr = MatDestroy(&An);CHKERRQ(ierr); 1965 } else { 1966 *B = An; 1967 } 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /* TODO: add reuse flag */ 1972 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1973 { 1974 Mat Bt; 1975 PetscScalar *a,*bdata; 1976 const PetscInt *ii,*ij; 1977 PetscInt m,n,i,nnz,*bii,*bij; 1978 PetscBool flg_row; 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1983 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1984 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1985 nnz = n; 1986 for (i=0;i<ii[n];i++) { 1987 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1988 } 1989 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1990 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1991 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1992 nnz = 0; 1993 bii[0] = 0; 1994 for (i=0;i<n;i++) { 1995 PetscInt j; 1996 for (j=ii[i];j<ii[i+1];j++) { 1997 PetscScalar entry = a[j]; 1998 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1999 bij[nnz] = ij[j]; 2000 bdata[nnz] = entry; 2001 nnz++; 2002 } 2003 } 2004 bii[i+1] = nnz; 2005 } 2006 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2007 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2008 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2009 { 2010 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2011 b->free_a = PETSC_TRUE; 2012 b->free_ij = PETSC_TRUE; 2013 } 2014 *B = Bt; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 2019 { 2020 Mat B; 2021 IS is_dummy,*cc_n; 2022 ISLocalToGlobalMapping l2gmap_dummy; 2023 PCBDDCGraph graph; 2024 PetscInt i,n; 2025 PetscInt *xadj,*adjncy; 2026 PetscInt *xadj_filtered,*adjncy_filtered; 2027 PetscBool flg_row,isseqaij; 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 if (!A->rmap->N || !A->cmap->N) { 2032 *ncc = 0; 2033 *cc = NULL; 2034 PetscFunctionReturn(0); 2035 } 2036 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2037 if (!isseqaij && filter) { 2038 PetscBool isseqdense; 2039 2040 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2041 if (!isseqdense) { 2042 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2043 } else { /* TODO: rectangular case and LDA */ 2044 PetscScalar *array; 2045 PetscReal chop=1.e-6; 2046 2047 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2048 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2049 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2050 for (i=0;i<n;i++) { 2051 PetscInt j; 2052 for (j=i+1;j<n;j++) { 2053 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2054 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2055 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2056 } 2057 } 2058 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2059 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2060 } 2061 } else { 2062 B = A; 2063 } 2064 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2065 2066 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2067 if (filter) { 2068 PetscScalar *data; 2069 PetscInt j,cum; 2070 2071 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2072 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2073 cum = 0; 2074 for (i=0;i<n;i++) { 2075 PetscInt t; 2076 2077 for (j=xadj[i];j<xadj[i+1];j++) { 2078 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2079 continue; 2080 } 2081 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2082 } 2083 t = xadj_filtered[i]; 2084 xadj_filtered[i] = cum; 2085 cum += t; 2086 } 2087 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2088 } else { 2089 xadj_filtered = NULL; 2090 adjncy_filtered = NULL; 2091 } 2092 2093 /* compute local connected components using PCBDDCGraph */ 2094 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2095 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2096 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2097 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2098 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2099 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2100 if (xadj_filtered) { 2101 graph->xadj = xadj_filtered; 2102 graph->adjncy = adjncy_filtered; 2103 } else { 2104 graph->xadj = xadj; 2105 graph->adjncy = adjncy; 2106 } 2107 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2108 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2109 /* partial clean up */ 2110 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2111 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2112 if (A != B) { 2113 ierr = MatDestroy(&B);CHKERRQ(ierr); 2114 } 2115 2116 /* get back data */ 2117 if (ncc) *ncc = graph->ncc; 2118 if (cc) { 2119 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2120 for (i=0;i<graph->ncc;i++) { 2121 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2122 } 2123 *cc = cc_n; 2124 } 2125 /* clean up graph */ 2126 graph->xadj = 0; 2127 graph->adjncy = 0; 2128 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2133 { 2134 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2135 PC_IS* pcis = (PC_IS*)(pc->data); 2136 IS dirIS = NULL; 2137 PetscInt i; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2142 if (zerodiag) { 2143 Mat A; 2144 Vec vec3_N; 2145 PetscScalar *vals; 2146 const PetscInt *idxs; 2147 PetscInt nz,*count; 2148 2149 /* p0 */ 2150 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2151 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2152 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2153 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2154 for (i=0;i<nz;i++) vals[i] = 1.; 2155 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2156 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2157 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2158 /* v_I */ 2159 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2160 for (i=0;i<nz;i++) vals[i] = 0.; 2161 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2162 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2163 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2164 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2165 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2166 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2167 if (dirIS) { 2168 PetscInt n; 2169 2170 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2171 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2172 for (i=0;i<n;i++) vals[i] = 0.; 2173 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2174 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2175 } 2176 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2177 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2178 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2179 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2180 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2181 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2182 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2183 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2184 ierr = PetscFree(vals);CHKERRQ(ierr); 2185 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2186 2187 /* there should not be any pressure dofs lying on the interface */ 2188 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2189 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2190 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2191 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2192 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2193 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2194 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2195 ierr = PetscFree(count);CHKERRQ(ierr); 2196 } 2197 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2198 2199 /* check PCBDDCBenignGetOrSetP0 */ 2200 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2201 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2202 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2203 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2204 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2205 for (i=0;i<pcbddc->benign_n;i++) { 2206 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2207 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2208 } 2209 PetscFunctionReturn(0); 2210 } 2211 2212 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2213 { 2214 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2215 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2216 PetscInt nz,n; 2217 PetscInt *interior_dofs,n_interior_dofs,nneu; 2218 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2223 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2224 for (n=0;n<pcbddc->benign_n;n++) { 2225 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2226 } 2227 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2228 pcbddc->benign_n = 0; 2229 2230 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2231 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2232 Checks if all the pressure dofs in each subdomain have a zero diagonal 2233 If not, a change of basis on pressures is not needed 2234 since the local Schur complements are already SPD 2235 */ 2236 has_null_pressures = PETSC_TRUE; 2237 have_null = PETSC_TRUE; 2238 if (pcbddc->n_ISForDofsLocal) { 2239 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2240 2241 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2242 ierr = PetscOptionsInt ("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2243 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2244 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2245 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2246 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2247 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2248 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2249 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2250 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2251 if (!sorted) { 2252 ierr = ISSort(pressures);CHKERRQ(ierr); 2253 } 2254 } else { 2255 pressures = NULL; 2256 } 2257 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2258 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2259 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2260 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2261 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2262 if (!sorted) { 2263 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2264 } 2265 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2266 zerodiag_save = zerodiag; 2267 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2268 if (!nz) { 2269 if (n) have_null = PETSC_FALSE; 2270 has_null_pressures = PETSC_FALSE; 2271 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2272 } 2273 recompute_zerodiag = PETSC_FALSE; 2274 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2275 zerodiag_subs = NULL; 2276 pcbddc->benign_n = 0; 2277 n_interior_dofs = 0; 2278 interior_dofs = NULL; 2279 nneu = 0; 2280 if (pcbddc->NeumannBoundariesLocal) { 2281 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2282 } 2283 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2284 if (checkb) { /* need to compute interior nodes */ 2285 PetscInt n,i,j; 2286 PetscInt n_neigh,*neigh,*n_shared,**shared; 2287 PetscInt *iwork; 2288 2289 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2290 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2291 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2292 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2293 for (i=1;i<n_neigh;i++) 2294 for (j=0;j<n_shared[i];j++) 2295 iwork[shared[i][j]] += 1; 2296 for (i=0;i<n;i++) 2297 if (!iwork[i]) 2298 interior_dofs[n_interior_dofs++] = i; 2299 ierr = PetscFree(iwork);CHKERRQ(ierr); 2300 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2301 } 2302 if (has_null_pressures) { 2303 IS *subs; 2304 PetscInt nsubs,i,j,nl; 2305 const PetscInt *idxs; 2306 PetscScalar *array; 2307 Vec *work; 2308 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2309 2310 subs = pcbddc->local_subs; 2311 nsubs = pcbddc->n_local_subs; 2312 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2313 if (checkb) { 2314 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2315 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2316 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2317 /* work[0] = 1_p */ 2318 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2319 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2320 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2321 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2322 /* work[0] = 1_v */ 2323 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2324 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2325 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2326 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2327 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2328 } 2329 if (nsubs > 1) { 2330 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2331 for (i=0;i<nsubs;i++) { 2332 ISLocalToGlobalMapping l2g; 2333 IS t_zerodiag_subs; 2334 PetscInt nl; 2335 2336 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2337 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2338 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2339 if (nl) { 2340 PetscBool valid = PETSC_TRUE; 2341 2342 if (checkb) { 2343 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2344 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2345 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2346 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2347 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2348 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2349 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2350 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2351 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2352 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2353 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2354 for (j=0;j<n_interior_dofs;j++) { 2355 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2356 valid = PETSC_FALSE; 2357 break; 2358 } 2359 } 2360 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2361 } 2362 if (valid && nneu) { 2363 const PetscInt *idxs; 2364 PetscInt nzb; 2365 2366 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2367 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2368 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2369 if (nzb) valid = PETSC_FALSE; 2370 } 2371 if (valid && pressures) { 2372 IS t_pressure_subs; 2373 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2374 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2375 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2376 } 2377 if (valid) { 2378 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2379 pcbddc->benign_n++; 2380 } else { 2381 recompute_zerodiag = PETSC_TRUE; 2382 } 2383 } 2384 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2385 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2386 } 2387 } else { /* there's just one subdomain (or zero if they have not been detected */ 2388 PetscBool valid = PETSC_TRUE; 2389 2390 if (nneu) valid = PETSC_FALSE; 2391 if (valid && pressures) { 2392 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2393 } 2394 if (valid && checkb) { 2395 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2396 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2397 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2398 for (j=0;j<n_interior_dofs;j++) { 2399 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2400 valid = PETSC_FALSE; 2401 break; 2402 } 2403 } 2404 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2405 } 2406 if (valid) { 2407 pcbddc->benign_n = 1; 2408 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2409 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2410 zerodiag_subs[0] = zerodiag; 2411 } 2412 } 2413 if (checkb) { 2414 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2415 } 2416 } 2417 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2418 2419 if (!pcbddc->benign_n) { 2420 PetscInt n; 2421 2422 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2423 recompute_zerodiag = PETSC_FALSE; 2424 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2425 if (n) { 2426 has_null_pressures = PETSC_FALSE; 2427 have_null = PETSC_FALSE; 2428 } 2429 } 2430 2431 /* final check for null pressures */ 2432 if (zerodiag && pressures) { 2433 PetscInt nz,np; 2434 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2435 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2436 if (nz != np) have_null = PETSC_FALSE; 2437 } 2438 2439 if (recompute_zerodiag) { 2440 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2441 if (pcbddc->benign_n == 1) { 2442 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2443 zerodiag = zerodiag_subs[0]; 2444 } else { 2445 PetscInt i,nzn,*new_idxs; 2446 2447 nzn = 0; 2448 for (i=0;i<pcbddc->benign_n;i++) { 2449 PetscInt ns; 2450 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2451 nzn += ns; 2452 } 2453 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2454 nzn = 0; 2455 for (i=0;i<pcbddc->benign_n;i++) { 2456 PetscInt ns,*idxs; 2457 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2458 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2459 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2460 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2461 nzn += ns; 2462 } 2463 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2464 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2465 } 2466 have_null = PETSC_FALSE; 2467 } 2468 2469 /* Prepare matrix to compute no-net-flux */ 2470 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2471 Mat A,loc_divudotp; 2472 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2473 IS row,col,isused = NULL; 2474 PetscInt M,N,n,st,n_isused; 2475 2476 if (pressures) { 2477 isused = pressures; 2478 } else { 2479 isused = zerodiag_save; 2480 } 2481 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2482 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2483 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2484 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2485 n_isused = 0; 2486 if (isused) { 2487 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2488 } 2489 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2490 st = st-n_isused; 2491 if (n) { 2492 const PetscInt *gidxs; 2493 2494 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2495 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2496 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2497 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2498 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2499 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2500 } else { 2501 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2502 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2503 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2504 } 2505 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2506 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2507 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2508 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2509 ierr = ISDestroy(&row);CHKERRQ(ierr); 2510 ierr = ISDestroy(&col);CHKERRQ(ierr); 2511 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2512 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2513 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2514 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2515 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2516 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2517 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2518 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2519 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2520 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2521 } 2522 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2523 2524 /* change of basis and p0 dofs */ 2525 if (has_null_pressures) { 2526 IS zerodiagc; 2527 const PetscInt *idxs,*idxsc; 2528 PetscInt i,s,*nnz; 2529 2530 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2531 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2532 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2533 /* local change of basis for pressures */ 2534 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2535 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2536 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2537 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2538 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2539 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2540 for (i=0;i<pcbddc->benign_n;i++) { 2541 PetscInt nzs,j; 2542 2543 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2544 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2545 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2546 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2547 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2548 } 2549 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2550 ierr = PetscFree(nnz);CHKERRQ(ierr); 2551 /* set identity on velocities */ 2552 for (i=0;i<n-nz;i++) { 2553 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2554 } 2555 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2556 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2557 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2558 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2559 /* set change on pressures */ 2560 for (s=0;s<pcbddc->benign_n;s++) { 2561 PetscScalar *array; 2562 PetscInt nzs; 2563 2564 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2565 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2566 for (i=0;i<nzs-1;i++) { 2567 PetscScalar vals[2]; 2568 PetscInt cols[2]; 2569 2570 cols[0] = idxs[i]; 2571 cols[1] = idxs[nzs-1]; 2572 vals[0] = 1.; 2573 vals[1] = 1.; 2574 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2575 } 2576 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2577 for (i=0;i<nzs-1;i++) array[i] = -1.; 2578 array[nzs-1] = 1.; 2579 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2580 /* store local idxs for p0 */ 2581 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2582 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2583 ierr = PetscFree(array);CHKERRQ(ierr); 2584 } 2585 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2586 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587 /* project if needed */ 2588 if (pcbddc->benign_change_explicit) { 2589 Mat M; 2590 2591 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2592 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2593 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2594 ierr = MatDestroy(&M);CHKERRQ(ierr); 2595 } 2596 /* store global idxs for p0 */ 2597 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2598 } 2599 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2600 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2601 2602 /* determines if the coarse solver will be singular or not */ 2603 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2604 /* determines if the problem has subdomains with 0 pressure block */ 2605 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2606 *zerodiaglocal = zerodiag; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2611 { 2612 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2613 PetscScalar *array; 2614 PetscErrorCode ierr; 2615 2616 PetscFunctionBegin; 2617 if (!pcbddc->benign_sf) { 2618 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2619 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2620 } 2621 if (get) { 2622 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2623 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2624 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2625 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2626 } else { 2627 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2628 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2629 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2630 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2631 } 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2636 { 2637 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2638 PetscErrorCode ierr; 2639 2640 PetscFunctionBegin; 2641 /* TODO: add error checking 2642 - avoid nested pop (or push) calls. 2643 - cannot push before pop. 2644 - cannot call this if pcbddc->local_mat is NULL 2645 */ 2646 if (!pcbddc->benign_n) { 2647 PetscFunctionReturn(0); 2648 } 2649 if (pop) { 2650 if (pcbddc->benign_change_explicit) { 2651 IS is_p0; 2652 MatReuse reuse; 2653 2654 /* extract B_0 */ 2655 reuse = MAT_INITIAL_MATRIX; 2656 if (pcbddc->benign_B0) { 2657 reuse = MAT_REUSE_MATRIX; 2658 } 2659 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2660 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2661 /* remove rows and cols from local problem */ 2662 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2663 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2664 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2665 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2666 } else { 2667 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2668 PetscScalar *vals; 2669 PetscInt i,n,*idxs_ins; 2670 2671 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2672 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2673 if (!pcbddc->benign_B0) { 2674 PetscInt *nnz; 2675 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2676 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2677 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2678 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2679 for (i=0;i<pcbddc->benign_n;i++) { 2680 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2681 nnz[i] = n - nnz[i]; 2682 } 2683 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2684 ierr = PetscFree(nnz);CHKERRQ(ierr); 2685 } 2686 2687 for (i=0;i<pcbddc->benign_n;i++) { 2688 PetscScalar *array; 2689 PetscInt *idxs,j,nz,cum; 2690 2691 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2692 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2693 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2694 for (j=0;j<nz;j++) vals[j] = 1.; 2695 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2696 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2697 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2698 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2699 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2700 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2701 cum = 0; 2702 for (j=0;j<n;j++) { 2703 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2704 vals[cum] = array[j]; 2705 idxs_ins[cum] = j; 2706 cum++; 2707 } 2708 } 2709 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2710 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2711 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2712 } 2713 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2714 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2715 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2716 } 2717 } else { /* push */ 2718 if (pcbddc->benign_change_explicit) { 2719 PetscInt i; 2720 2721 for (i=0;i<pcbddc->benign_n;i++) { 2722 PetscScalar *B0_vals; 2723 PetscInt *B0_cols,B0_ncol; 2724 2725 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2726 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2727 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2728 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2729 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2730 } 2731 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2732 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2733 } else { 2734 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2735 } 2736 } 2737 PetscFunctionReturn(0); 2738 } 2739 2740 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2741 { 2742 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2743 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2744 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2745 PetscBLASInt *B_iwork,*B_ifail; 2746 PetscScalar *work,lwork; 2747 PetscScalar *St,*S,*eigv; 2748 PetscScalar *Sarray,*Starray; 2749 PetscReal *eigs,thresh; 2750 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2751 PetscBool allocated_S_St; 2752 #if defined(PETSC_USE_COMPLEX) 2753 PetscReal *rwork; 2754 #endif 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2759 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2760 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2761 2762 if (pcbddc->dbg_flag) { 2763 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2764 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2765 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2766 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2767 } 2768 2769 if (pcbddc->dbg_flag) { 2770 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2771 } 2772 2773 /* max size of subsets */ 2774 mss = 0; 2775 for (i=0;i<sub_schurs->n_subs;i++) { 2776 PetscInt subset_size; 2777 2778 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2779 mss = PetscMax(mss,subset_size); 2780 } 2781 2782 /* min/max and threshold */ 2783 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2784 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2785 nmax = PetscMax(nmin,nmax); 2786 allocated_S_St = PETSC_FALSE; 2787 if (nmin) { 2788 allocated_S_St = PETSC_TRUE; 2789 } 2790 2791 /* allocate lapack workspace */ 2792 cum = cum2 = 0; 2793 maxneigs = 0; 2794 for (i=0;i<sub_schurs->n_subs;i++) { 2795 PetscInt n,subset_size; 2796 2797 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2798 n = PetscMin(subset_size,nmax); 2799 cum += subset_size; 2800 cum2 += subset_size*n; 2801 maxneigs = PetscMax(maxneigs,n); 2802 } 2803 if (mss) { 2804 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2805 PetscBLASInt B_itype = 1; 2806 PetscBLASInt B_N = mss; 2807 PetscReal zero = 0.0; 2808 PetscReal eps = 0.0; /* dlamch? */ 2809 2810 B_lwork = -1; 2811 S = NULL; 2812 St = NULL; 2813 eigs = NULL; 2814 eigv = NULL; 2815 B_iwork = NULL; 2816 B_ifail = NULL; 2817 #if defined(PETSC_USE_COMPLEX) 2818 rwork = NULL; 2819 #endif 2820 thresh = 1.0; 2821 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2822 #if defined(PETSC_USE_COMPLEX) 2823 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2824 #else 2825 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2826 #endif 2827 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2828 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2829 } else { 2830 /* TODO */ 2831 } 2832 } else { 2833 lwork = 0; 2834 } 2835 2836 nv = 0; 2837 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2838 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2839 } 2840 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2841 if (allocated_S_St) { 2842 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2843 } 2844 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2845 #if defined(PETSC_USE_COMPLEX) 2846 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2847 #endif 2848 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2849 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2850 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2851 nv+cum,&pcbddc->adaptive_constraints_idxs, 2852 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2853 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2854 2855 maxneigs = 0; 2856 cum = cumarray = 0; 2857 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2858 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2859 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2860 const PetscInt *idxs; 2861 2862 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2863 for (cum=0;cum<nv;cum++) { 2864 pcbddc->adaptive_constraints_n[cum] = 1; 2865 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2866 pcbddc->adaptive_constraints_data[cum] = 1.0; 2867 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2868 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2869 } 2870 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2871 } 2872 2873 if (mss) { /* multilevel */ 2874 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2875 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2876 } 2877 2878 thresh = pcbddc->adaptive_threshold; 2879 for (i=0;i<sub_schurs->n_subs;i++) { 2880 const PetscInt *idxs; 2881 PetscReal upper,lower; 2882 PetscInt j,subset_size,eigs_start = 0; 2883 PetscBLASInt B_N; 2884 PetscBool same_data = PETSC_FALSE; 2885 2886 if (pcbddc->use_deluxe_scaling) { 2887 upper = PETSC_MAX_REAL; 2888 lower = thresh; 2889 } else { 2890 upper = 1./thresh; 2891 lower = 0.; 2892 } 2893 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2894 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2895 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2896 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2897 if (sub_schurs->is_hermitian) { 2898 PetscInt j,k; 2899 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2900 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2901 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2902 } 2903 for (j=0;j<subset_size;j++) { 2904 for (k=j;k<subset_size;k++) { 2905 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2906 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2907 } 2908 } 2909 } else { 2910 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2911 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2912 } 2913 } else { 2914 S = Sarray + cumarray; 2915 St = Starray + cumarray; 2916 } 2917 /* see if we can save some work */ 2918 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2919 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2920 } 2921 2922 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2923 B_neigs = 0; 2924 } else { 2925 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2926 PetscBLASInt B_itype = 1; 2927 PetscBLASInt B_IL, B_IU; 2928 PetscReal eps = -1.0; /* dlamch? */ 2929 PetscInt nmin_s; 2930 PetscBool compute_range = PETSC_FALSE; 2931 2932 if (pcbddc->dbg_flag) { 2933 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2934 } 2935 2936 compute_range = PETSC_FALSE; 2937 if (thresh > 1.+PETSC_SMALL && !same_data) { 2938 compute_range = PETSC_TRUE; 2939 } 2940 2941 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2942 if (compute_range) { 2943 2944 /* ask for eigenvalues larger than thresh */ 2945 #if defined(PETSC_USE_COMPLEX) 2946 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2947 #else 2948 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2949 #endif 2950 } else if (!same_data) { 2951 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2952 B_IL = 1; 2953 #if defined(PETSC_USE_COMPLEX) 2954 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2955 #else 2956 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2957 #endif 2958 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2959 PetscInt k; 2960 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2961 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2962 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2963 nmin = nmax; 2964 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2965 for (k=0;k<nmax;k++) { 2966 eigs[k] = 1./PETSC_SMALL; 2967 eigv[k*(subset_size+1)] = 1.0; 2968 } 2969 } 2970 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2971 if (B_ierr) { 2972 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2973 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2974 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2975 } 2976 2977 if (B_neigs > nmax) { 2978 if (pcbddc->dbg_flag) { 2979 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2980 } 2981 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2982 B_neigs = nmax; 2983 } 2984 2985 nmin_s = PetscMin(nmin,B_N); 2986 if (B_neigs < nmin_s) { 2987 PetscBLASInt B_neigs2; 2988 2989 if (pcbddc->use_deluxe_scaling) { 2990 B_IL = B_N - nmin_s + 1; 2991 B_IU = B_N - B_neigs; 2992 } else { 2993 B_IL = B_neigs + 1; 2994 B_IU = nmin_s; 2995 } 2996 if (pcbddc->dbg_flag) { 2997 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2998 } 2999 if (sub_schurs->is_hermitian) { 3000 PetscInt j,k; 3001 for (j=0;j<subset_size;j++) { 3002 for (k=j;k<subset_size;k++) { 3003 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3004 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3005 } 3006 } 3007 } else { 3008 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3009 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3010 } 3011 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3012 #if defined(PETSC_USE_COMPLEX) 3013 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3014 #else 3015 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3016 #endif 3017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3018 B_neigs += B_neigs2; 3019 } 3020 if (B_ierr) { 3021 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3022 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3023 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3024 } 3025 if (pcbddc->dbg_flag) { 3026 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3027 for (j=0;j<B_neigs;j++) { 3028 if (eigs[j] == 0.0) { 3029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3030 } else { 3031 if (pcbddc->use_deluxe_scaling) { 3032 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3033 } else { 3034 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3035 } 3036 } 3037 } 3038 } 3039 } else { 3040 /* TODO */ 3041 } 3042 } 3043 /* change the basis back to the original one */ 3044 if (sub_schurs->change) { 3045 Mat change,phi,phit; 3046 3047 if (pcbddc->dbg_flag > 1) { 3048 PetscInt ii; 3049 for (ii=0;ii<B_neigs;ii++) { 3050 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3051 for (j=0;j<B_N;j++) { 3052 #if defined(PETSC_USE_COMPLEX) 3053 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3054 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3056 #else 3057 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3058 #endif 3059 } 3060 } 3061 } 3062 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3063 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3064 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3065 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3066 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3067 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3068 } 3069 maxneigs = PetscMax(B_neigs,maxneigs); 3070 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3071 if (B_neigs) { 3072 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3073 3074 if (pcbddc->dbg_flag > 1) { 3075 PetscInt ii; 3076 for (ii=0;ii<B_neigs;ii++) { 3077 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3078 for (j=0;j<B_N;j++) { 3079 #if defined(PETSC_USE_COMPLEX) 3080 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3081 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3082 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3083 #else 3084 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3085 #endif 3086 } 3087 } 3088 } 3089 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3090 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3091 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3092 cum++; 3093 } 3094 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3095 /* shift for next computation */ 3096 cumarray += subset_size*subset_size; 3097 } 3098 if (pcbddc->dbg_flag) { 3099 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3100 } 3101 3102 if (mss) { 3103 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3104 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3105 /* destroy matrices (junk) */ 3106 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3107 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3108 } 3109 if (allocated_S_St) { 3110 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3111 } 3112 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3113 #if defined(PETSC_USE_COMPLEX) 3114 ierr = PetscFree(rwork);CHKERRQ(ierr); 3115 #endif 3116 if (pcbddc->dbg_flag) { 3117 PetscInt maxneigs_r; 3118 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3120 } 3121 PetscFunctionReturn(0); 3122 } 3123 3124 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3125 { 3126 PetscScalar *coarse_submat_vals; 3127 PetscErrorCode ierr; 3128 3129 PetscFunctionBegin; 3130 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3131 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3132 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3133 3134 /* Setup local neumann solver ksp_R */ 3135 /* PCBDDCSetUpLocalScatters should be called first! */ 3136 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3137 3138 /* 3139 Setup local correction and local part of coarse basis. 3140 Gives back the dense local part of the coarse matrix in column major ordering 3141 */ 3142 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3143 3144 /* Compute total number of coarse nodes and setup coarse solver */ 3145 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3146 3147 /* free */ 3148 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3149 PetscFunctionReturn(0); 3150 } 3151 3152 PetscErrorCode PCBDDCResetCustomization(PC pc) 3153 { 3154 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3155 PetscErrorCode ierr; 3156 3157 PetscFunctionBegin; 3158 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3159 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3160 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3161 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3162 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3163 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3164 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3165 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3166 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3167 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3168 PetscFunctionReturn(0); 3169 } 3170 3171 PetscErrorCode PCBDDCResetTopography(PC pc) 3172 { 3173 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3174 PetscInt i; 3175 PetscErrorCode ierr; 3176 3177 PetscFunctionBegin; 3178 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3179 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3180 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3181 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3182 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3183 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3184 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3185 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3186 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3187 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3188 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3189 for (i=0;i<pcbddc->n_local_subs;i++) { 3190 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3191 } 3192 pcbddc->n_local_subs = 0; 3193 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3194 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3195 pcbddc->graphanalyzed = PETSC_FALSE; 3196 pcbddc->recompute_topography = PETSC_TRUE; 3197 PetscFunctionReturn(0); 3198 } 3199 3200 PetscErrorCode PCBDDCResetSolvers(PC pc) 3201 { 3202 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3203 PetscErrorCode ierr; 3204 3205 PetscFunctionBegin; 3206 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3207 if (pcbddc->coarse_phi_B) { 3208 PetscScalar *array; 3209 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3210 ierr = PetscFree(array);CHKERRQ(ierr); 3211 } 3212 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3213 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3214 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3215 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3216 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3217 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3218 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3219 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3220 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3221 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3222 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3223 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3224 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3225 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3226 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3227 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3228 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3229 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3230 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3231 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3232 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3233 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3234 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3235 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3236 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3237 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3238 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3239 if (pcbddc->benign_zerodiag_subs) { 3240 PetscInt i; 3241 for (i=0;i<pcbddc->benign_n;i++) { 3242 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3243 } 3244 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3245 } 3246 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3247 PetscFunctionReturn(0); 3248 } 3249 3250 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3251 { 3252 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3253 PC_IS *pcis = (PC_IS*)pc->data; 3254 VecType impVecType; 3255 PetscInt n_constraints,n_R,old_size; 3256 PetscErrorCode ierr; 3257 3258 PetscFunctionBegin; 3259 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3260 n_R = pcis->n - pcbddc->n_vertices; 3261 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3262 /* local work vectors (try to avoid unneeded work)*/ 3263 /* R nodes */ 3264 old_size = -1; 3265 if (pcbddc->vec1_R) { 3266 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3267 } 3268 if (n_R != old_size) { 3269 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3270 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3271 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3272 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3273 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3274 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3275 } 3276 /* local primal dofs */ 3277 old_size = -1; 3278 if (pcbddc->vec1_P) { 3279 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3280 } 3281 if (pcbddc->local_primal_size != old_size) { 3282 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3283 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3284 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3285 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3286 } 3287 /* local explicit constraints */ 3288 old_size = -1; 3289 if (pcbddc->vec1_C) { 3290 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3291 } 3292 if (n_constraints && n_constraints != old_size) { 3293 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3294 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3295 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3296 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3297 } 3298 PetscFunctionReturn(0); 3299 } 3300 3301 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3302 { 3303 PetscErrorCode ierr; 3304 /* pointers to pcis and pcbddc */ 3305 PC_IS* pcis = (PC_IS*)pc->data; 3306 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3307 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3308 /* submatrices of local problem */ 3309 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3310 /* submatrices of local coarse problem */ 3311 Mat S_VV,S_CV,S_VC,S_CC; 3312 /* working matrices */ 3313 Mat C_CR; 3314 /* additional working stuff */ 3315 PC pc_R; 3316 Mat F,Brhs = NULL; 3317 Vec dummy_vec; 3318 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3319 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3320 PetscScalar *work; 3321 PetscInt *idx_V_B; 3322 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3323 PetscInt i,n_R,n_D,n_B; 3324 3325 /* some shortcuts to scalars */ 3326 PetscScalar one=1.0,m_one=-1.0; 3327 3328 PetscFunctionBegin; 3329 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3330 3331 /* Set Non-overlapping dimensions */ 3332 n_vertices = pcbddc->n_vertices; 3333 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3334 n_B = pcis->n_B; 3335 n_D = pcis->n - n_B; 3336 n_R = pcis->n - n_vertices; 3337 3338 /* vertices in boundary numbering */ 3339 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3340 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3341 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3342 3343 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3344 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3345 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3346 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3347 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3348 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3349 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3350 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3352 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3353 3354 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3355 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3356 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3357 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3358 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3359 lda_rhs = n_R; 3360 need_benign_correction = PETSC_FALSE; 3361 if (isLU || isILU || isCHOL) { 3362 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3363 } else if (sub_schurs && sub_schurs->reuse_solver) { 3364 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3365 MatFactorType type; 3366 3367 F = reuse_solver->F; 3368 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3369 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3370 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3371 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3372 } else { 3373 F = NULL; 3374 } 3375 3376 /* determine if we can use a sparse right-hand side */ 3377 sparserhs = PETSC_FALSE; 3378 if (F) { 3379 const MatSolverPackage solver; 3380 3381 ierr = MatFactorGetSolverPackage(F,&solver);CHKERRQ(ierr); 3382 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3383 } 3384 3385 /* allocate workspace */ 3386 n = 0; 3387 if (n_constraints) { 3388 n += lda_rhs*n_constraints; 3389 } 3390 if (n_vertices) { 3391 n = PetscMax(2*lda_rhs*n_vertices,n); 3392 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3393 } 3394 if (!pcbddc->symmetric_primal) { 3395 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3396 } 3397 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3398 3399 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3400 dummy_vec = NULL; 3401 if (need_benign_correction && lda_rhs != n_R && F) { 3402 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3403 } 3404 3405 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3406 if (n_constraints) { 3407 Mat M1,M2,M3,C_B; 3408 IS is_aux; 3409 PetscScalar *array,*array2; 3410 3411 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3412 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3413 3414 /* Extract constraints on R nodes: C_{CR} */ 3415 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3416 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3417 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3418 3419 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3420 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3421 if (!sparserhs) { 3422 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3423 for (i=0;i<n_constraints;i++) { 3424 const PetscScalar *row_cmat_values; 3425 const PetscInt *row_cmat_indices; 3426 PetscInt size_of_constraint,j; 3427 3428 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3429 for (j=0;j<size_of_constraint;j++) { 3430 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3431 } 3432 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3433 } 3434 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3435 } else { 3436 Mat tC_CR; 3437 3438 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3439 if (lda_rhs != n_R) { 3440 PetscScalar *aa; 3441 PetscInt r,*ii,*jj; 3442 PetscBool done; 3443 3444 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3445 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");CHKERRQ(ierr); 3446 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3447 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3448 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3449 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");CHKERRQ(ierr); 3450 } else { 3451 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3452 tC_CR = C_CR; 3453 } 3454 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3455 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3456 } 3457 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3458 if (F) { 3459 if (need_benign_correction) { 3460 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3461 3462 /* rhs is already zero on interior dofs, no need to change the rhs */ 3463 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3464 } 3465 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3466 if (need_benign_correction) { 3467 PetscScalar *marr; 3468 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3469 3470 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3471 if (lda_rhs != n_R) { 3472 for (i=0;i<n_constraints;i++) { 3473 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3474 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3475 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3476 } 3477 } else { 3478 for (i=0;i<n_constraints;i++) { 3479 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3480 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3481 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3482 } 3483 } 3484 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3485 } 3486 } else { 3487 PetscScalar *marr; 3488 3489 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3490 for (i=0;i<n_constraints;i++) { 3491 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3492 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3493 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3494 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3495 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3496 } 3497 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3498 } 3499 if (sparserhs) { 3500 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3501 } 3502 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3503 if (!pcbddc->switch_static) { 3504 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3505 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3506 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3507 for (i=0;i<n_constraints;i++) { 3508 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3509 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3510 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3511 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3512 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3513 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3514 } 3515 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3516 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3517 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3518 } else { 3519 if (lda_rhs != n_R) { 3520 IS dummy; 3521 3522 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3523 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3524 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3525 } else { 3526 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3527 pcbddc->local_auxmat2 = local_auxmat2_R; 3528 } 3529 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3530 } 3531 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3532 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3533 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3534 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3535 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3536 if (isCHOL) { 3537 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3538 } else { 3539 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3540 } 3541 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3542 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3543 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3544 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3545 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3546 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3547 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3548 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3549 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3550 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3551 } 3552 3553 /* Get submatrices from subdomain matrix */ 3554 if (n_vertices) { 3555 IS is_aux; 3556 PetscBool isseqaij; 3557 3558 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3559 IS tis; 3560 3561 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3562 ierr = ISSort(tis);CHKERRQ(ierr); 3563 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3564 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3565 } else { 3566 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3567 } 3568 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3569 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3570 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3571 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3572 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3573 } 3574 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3575 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3576 } 3577 3578 /* Matrix of coarse basis functions (local) */ 3579 if (pcbddc->coarse_phi_B) { 3580 PetscInt on_B,on_primal,on_D=n_D; 3581 if (pcbddc->coarse_phi_D) { 3582 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3583 } 3584 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3585 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3586 PetscScalar *marray; 3587 3588 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3589 ierr = PetscFree(marray);CHKERRQ(ierr); 3590 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3591 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3592 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3593 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3594 } 3595 } 3596 3597 if (!pcbddc->coarse_phi_B) { 3598 PetscScalar *marr; 3599 3600 /* memory size */ 3601 n = n_B*pcbddc->local_primal_size; 3602 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3603 if (!pcbddc->symmetric_primal) n *= 2; 3604 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3605 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3606 marr += n_B*pcbddc->local_primal_size; 3607 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3608 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3609 marr += n_D*pcbddc->local_primal_size; 3610 } 3611 if (!pcbddc->symmetric_primal) { 3612 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3613 marr += n_B*pcbddc->local_primal_size; 3614 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3616 } 3617 } else { 3618 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3619 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3620 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3621 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3622 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3623 } 3624 } 3625 } 3626 3627 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3628 p0_lidx_I = NULL; 3629 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3630 const PetscInt *idxs; 3631 3632 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3633 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3634 for (i=0;i<pcbddc->benign_n;i++) { 3635 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3636 } 3637 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3638 } 3639 3640 /* vertices */ 3641 if (n_vertices) { 3642 PetscBool restoreavr = PETSC_FALSE; 3643 3644 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3645 3646 if (n_R) { 3647 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3648 PetscBLASInt B_N,B_one = 1; 3649 PetscScalar *x,*y; 3650 3651 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3652 if (need_benign_correction) { 3653 ISLocalToGlobalMapping RtoN; 3654 IS is_p0; 3655 PetscInt *idxs_p0,n; 3656 3657 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3658 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3659 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3660 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3661 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3662 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3663 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3664 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3665 } 3666 3667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3668 if (!sparserhs || need_benign_correction) { 3669 if (lda_rhs == n_R) { 3670 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3671 } else { 3672 PetscScalar *av,*array; 3673 const PetscInt *xadj,*adjncy; 3674 PetscInt n; 3675 PetscBool flg_row; 3676 3677 array = work+lda_rhs*n_vertices; 3678 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3679 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3680 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3681 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3682 for (i=0;i<n;i++) { 3683 PetscInt j; 3684 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3685 } 3686 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3687 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3688 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3689 } 3690 if (need_benign_correction) { 3691 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3692 PetscScalar *marr; 3693 3694 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3695 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3696 3697 | 0 0 0 | (V) 3698 L = | 0 0 -1 | (P-p0) 3699 | 0 0 -1 | (p0) 3700 3701 */ 3702 for (i=0;i<reuse_solver->benign_n;i++) { 3703 const PetscScalar *vals; 3704 const PetscInt *idxs,*idxs_zero; 3705 PetscInt n,j,nz; 3706 3707 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3708 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3709 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3710 for (j=0;j<n;j++) { 3711 PetscScalar val = vals[j]; 3712 PetscInt k,col = idxs[j]; 3713 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3714 } 3715 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3716 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3717 } 3718 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3719 } 3720 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3721 Brhs = A_RV; 3722 } else { 3723 Mat tA_RVT,A_RVT; 3724 3725 if (!pcbddc->symmetric_primal) { 3726 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3727 } else { 3728 restoreavr = PETSC_TRUE; 3729 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3730 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3731 A_RVT = A_VR; 3732 } 3733 if (lda_rhs != n_R) { 3734 PetscScalar *aa; 3735 PetscInt r,*ii,*jj; 3736 PetscBool done; 3737 3738 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3739 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");CHKERRQ(ierr); 3740 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 3741 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 3742 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3743 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");CHKERRQ(ierr); 3744 } else { 3745 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 3746 tA_RVT = A_RVT; 3747 } 3748 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 3749 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 3750 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 3751 } 3752 if (F) { 3753 /* need to correct the rhs */ 3754 if (need_benign_correction) { 3755 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3756 PetscScalar *marr; 3757 3758 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 3759 if (lda_rhs != n_R) { 3760 for (i=0;i<n_vertices;i++) { 3761 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3762 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3763 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3764 } 3765 } else { 3766 for (i=0;i<n_vertices;i++) { 3767 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3768 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3769 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3770 } 3771 } 3772 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 3773 } 3774 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 3775 if (restoreavr) { 3776 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3777 } 3778 /* need to correct the solution */ 3779 if (need_benign_correction) { 3780 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3781 PetscScalar *marr; 3782 3783 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3784 if (lda_rhs != n_R) { 3785 for (i=0;i<n_vertices;i++) { 3786 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3787 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3788 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3789 } 3790 } else { 3791 for (i=0;i<n_vertices;i++) { 3792 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3793 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3794 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3795 } 3796 } 3797 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3798 } 3799 } else { 3800 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 3801 for (i=0;i<n_vertices;i++) { 3802 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3803 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3804 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3805 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3806 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3807 } 3808 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 3809 } 3810 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3811 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3812 /* S_VV and S_CV */ 3813 if (n_constraints) { 3814 Mat B; 3815 3816 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3817 for (i=0;i<n_vertices;i++) { 3818 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3819 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3820 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3821 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3822 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3823 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3824 } 3825 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3826 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3827 ierr = MatDestroy(&B);CHKERRQ(ierr); 3828 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3829 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3830 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3831 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3832 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3833 ierr = MatDestroy(&B);CHKERRQ(ierr); 3834 } 3835 if (lda_rhs != n_R) { 3836 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3837 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3838 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3839 } 3840 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3841 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3842 if (need_benign_correction) { 3843 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3844 PetscScalar *marr,*sums; 3845 3846 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3847 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3848 for (i=0;i<reuse_solver->benign_n;i++) { 3849 const PetscScalar *vals; 3850 const PetscInt *idxs,*idxs_zero; 3851 PetscInt n,j,nz; 3852 3853 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3854 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3855 for (j=0;j<n_vertices;j++) { 3856 PetscInt k; 3857 sums[j] = 0.; 3858 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3859 } 3860 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3861 for (j=0;j<n;j++) { 3862 PetscScalar val = vals[j]; 3863 PetscInt k; 3864 for (k=0;k<n_vertices;k++) { 3865 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3866 } 3867 } 3868 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3869 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3870 } 3871 ierr = PetscFree(sums);CHKERRQ(ierr); 3872 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3873 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3874 } 3875 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3876 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3877 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3878 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3879 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3880 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3881 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3882 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3883 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3884 } else { 3885 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3886 } 3887 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3888 3889 /* coarse basis functions */ 3890 for (i=0;i<n_vertices;i++) { 3891 PetscScalar *y; 3892 3893 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3894 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3895 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3896 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3897 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3898 y[n_B*i+idx_V_B[i]] = 1.0; 3899 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3900 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3901 3902 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3903 PetscInt j; 3904 3905 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3906 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3907 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3908 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3909 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3910 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3911 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3912 } 3913 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3914 } 3915 /* if n_R == 0 the object is not destroyed */ 3916 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3917 } 3918 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3919 3920 if (n_constraints) { 3921 Mat B; 3922 3923 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3924 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3925 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3926 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3927 if (n_vertices) { 3928 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3929 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3930 } else { 3931 Mat S_VCt; 3932 3933 if (lda_rhs != n_R) { 3934 ierr = MatDestroy(&B);CHKERRQ(ierr); 3935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3936 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3937 } 3938 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3939 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3940 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3941 } 3942 } 3943 ierr = MatDestroy(&B);CHKERRQ(ierr); 3944 /* coarse basis functions */ 3945 for (i=0;i<n_constraints;i++) { 3946 PetscScalar *y; 3947 3948 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3949 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3950 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3951 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3952 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3953 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3954 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3955 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3956 PetscInt j; 3957 3958 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3959 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3960 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3961 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3962 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3963 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3964 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3965 } 3966 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3967 } 3968 } 3969 if (n_constraints) { 3970 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3971 } 3972 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3973 3974 /* coarse matrix entries relative to B_0 */ 3975 if (pcbddc->benign_n) { 3976 Mat B0_B,B0_BPHI; 3977 IS is_dummy; 3978 PetscScalar *data; 3979 PetscInt j; 3980 3981 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3982 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3983 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3984 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3985 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3986 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3987 for (j=0;j<pcbddc->benign_n;j++) { 3988 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3989 for (i=0;i<pcbddc->local_primal_size;i++) { 3990 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3991 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3992 } 3993 } 3994 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3995 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3996 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3997 } 3998 3999 /* compute other basis functions for non-symmetric problems */ 4000 if (!pcbddc->symmetric_primal) { 4001 Mat B_V=NULL,B_C=NULL; 4002 PetscScalar *marray; 4003 4004 if (n_constraints) { 4005 Mat S_CCT,C_CRT; 4006 4007 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4008 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4009 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4010 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4011 if (n_vertices) { 4012 Mat S_VCT; 4013 4014 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4015 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4016 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4017 } 4018 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4019 } else { 4020 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4021 } 4022 if (n_vertices && n_R) { 4023 PetscScalar *av,*marray; 4024 const PetscInt *xadj,*adjncy; 4025 PetscInt n; 4026 PetscBool flg_row; 4027 4028 /* B_V = B_V - A_VR^T */ 4029 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4030 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4031 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4032 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4033 for (i=0;i<n;i++) { 4034 PetscInt j; 4035 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4036 } 4037 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4038 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4039 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4040 } 4041 4042 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4043 if (n_vertices) { 4044 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4045 for (i=0;i<n_vertices;i++) { 4046 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4047 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4048 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4049 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4050 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4051 } 4052 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4053 } 4054 if (B_C) { 4055 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4056 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4057 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4058 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4059 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4060 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4061 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4062 } 4063 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4064 } 4065 /* coarse basis functions */ 4066 for (i=0;i<pcbddc->local_primal_size;i++) { 4067 PetscScalar *y; 4068 4069 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4070 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4071 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4072 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4073 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4074 if (i<n_vertices) { 4075 y[n_B*i+idx_V_B[i]] = 1.0; 4076 } 4077 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4078 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4079 4080 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4081 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4082 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4083 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4084 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4085 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4086 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4087 } 4088 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4089 } 4090 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4091 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4092 } 4093 4094 /* free memory */ 4095 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4096 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4097 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4098 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4099 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4100 ierr = PetscFree(work);CHKERRQ(ierr); 4101 if (n_vertices) { 4102 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4103 } 4104 if (n_constraints) { 4105 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4106 } 4107 /* Checking coarse_sub_mat and coarse basis functios */ 4108 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4109 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4110 if (pcbddc->dbg_flag) { 4111 Mat coarse_sub_mat; 4112 Mat AUXMAT,TM1,TM2,TM3,TM4; 4113 Mat coarse_phi_D,coarse_phi_B; 4114 Mat coarse_psi_D,coarse_psi_B; 4115 Mat A_II,A_BB,A_IB,A_BI; 4116 Mat C_B,CPHI; 4117 IS is_dummy; 4118 Vec mones; 4119 MatType checkmattype=MATSEQAIJ; 4120 PetscReal real_value; 4121 4122 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4123 Mat A; 4124 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4125 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4126 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4127 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4128 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4129 ierr = MatDestroy(&A);CHKERRQ(ierr); 4130 } else { 4131 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4132 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4133 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4134 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4135 } 4136 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4137 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4138 if (!pcbddc->symmetric_primal) { 4139 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4140 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4141 } 4142 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4143 4144 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4146 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4147 if (!pcbddc->symmetric_primal) { 4148 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4149 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4150 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4151 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4152 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4153 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4154 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4155 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4156 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4157 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4158 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4159 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4160 } else { 4161 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4162 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4163 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4164 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4165 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4166 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4167 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4168 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4169 } 4170 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4171 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4172 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4173 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4174 if (pcbddc->benign_n) { 4175 Mat B0_B,B0_BPHI; 4176 PetscScalar *data,*data2; 4177 PetscInt j; 4178 4179 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4180 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4181 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4182 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4183 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4184 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4185 for (j=0;j<pcbddc->benign_n;j++) { 4186 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4187 for (i=0;i<pcbddc->local_primal_size;i++) { 4188 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4189 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4190 } 4191 } 4192 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4193 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4194 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4195 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4196 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4197 } 4198 #if 0 4199 { 4200 PetscViewer viewer; 4201 char filename[256]; 4202 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4203 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4204 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4205 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4206 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4207 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4208 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4209 if (save_change) { 4210 Mat phi_B; 4211 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4212 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4213 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4214 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4215 } else { 4216 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4217 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4218 } 4219 if (pcbddc->coarse_phi_D) { 4220 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4221 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4222 } 4223 if (pcbddc->coarse_psi_B) { 4224 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4225 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4226 } 4227 if (pcbddc->coarse_psi_D) { 4228 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4229 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4230 } 4231 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4232 } 4233 #endif 4234 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4235 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4236 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4238 4239 /* check constraints */ 4240 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4241 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4242 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4243 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4244 } else { 4245 PetscScalar *data; 4246 Mat tmat; 4247 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4248 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4249 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4250 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4251 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4252 } 4253 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4254 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4255 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4256 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4257 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4258 if (!pcbddc->symmetric_primal) { 4259 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4260 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4261 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4262 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4264 } 4265 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4266 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4267 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4268 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4269 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4270 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4271 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4272 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4273 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4274 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4275 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4276 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4277 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4278 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4279 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4280 if (!pcbddc->symmetric_primal) { 4281 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4282 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4283 } 4284 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4285 } 4286 /* get back data */ 4287 *coarse_submat_vals_n = coarse_submat_vals; 4288 PetscFunctionReturn(0); 4289 } 4290 4291 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4292 { 4293 Mat *work_mat; 4294 IS isrow_s,iscol_s; 4295 PetscBool rsorted,csorted; 4296 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4297 PetscErrorCode ierr; 4298 4299 PetscFunctionBegin; 4300 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4301 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4302 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4303 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4304 4305 if (!rsorted) { 4306 const PetscInt *idxs; 4307 PetscInt *idxs_sorted,i; 4308 4309 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4310 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4311 for (i=0;i<rsize;i++) { 4312 idxs_perm_r[i] = i; 4313 } 4314 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4315 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4316 for (i=0;i<rsize;i++) { 4317 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4318 } 4319 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4320 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4321 } else { 4322 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4323 isrow_s = isrow; 4324 } 4325 4326 if (!csorted) { 4327 if (isrow == iscol) { 4328 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4329 iscol_s = isrow_s; 4330 } else { 4331 const PetscInt *idxs; 4332 PetscInt *idxs_sorted,i; 4333 4334 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4335 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4336 for (i=0;i<csize;i++) { 4337 idxs_perm_c[i] = i; 4338 } 4339 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4340 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4341 for (i=0;i<csize;i++) { 4342 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4343 } 4344 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4345 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4346 } 4347 } else { 4348 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4349 iscol_s = iscol; 4350 } 4351 4352 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4353 4354 if (!rsorted || !csorted) { 4355 Mat new_mat; 4356 IS is_perm_r,is_perm_c; 4357 4358 if (!rsorted) { 4359 PetscInt *idxs_r,i; 4360 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4361 for (i=0;i<rsize;i++) { 4362 idxs_r[idxs_perm_r[i]] = i; 4363 } 4364 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4365 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4366 } else { 4367 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4368 } 4369 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4370 4371 if (!csorted) { 4372 if (isrow_s == iscol_s) { 4373 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4374 is_perm_c = is_perm_r; 4375 } else { 4376 PetscInt *idxs_c,i; 4377 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4378 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4379 for (i=0;i<csize;i++) { 4380 idxs_c[idxs_perm_c[i]] = i; 4381 } 4382 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4383 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4384 } 4385 } else { 4386 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4387 } 4388 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4389 4390 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4391 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4392 work_mat[0] = new_mat; 4393 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4394 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4395 } 4396 4397 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4398 *B = work_mat[0]; 4399 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4400 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4401 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4402 PetscFunctionReturn(0); 4403 } 4404 4405 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4406 { 4407 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4408 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4409 Mat new_mat,lA; 4410 IS is_local,is_global; 4411 PetscInt local_size; 4412 PetscBool isseqaij; 4413 PetscErrorCode ierr; 4414 4415 PetscFunctionBegin; 4416 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4417 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4418 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4419 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4420 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4421 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4422 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4423 4424 /* check */ 4425 if (pcbddc->dbg_flag) { 4426 Vec x,x_change; 4427 PetscReal error; 4428 4429 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4430 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4431 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4432 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4433 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4434 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4435 if (!pcbddc->change_interior) { 4436 const PetscScalar *x,*y,*v; 4437 PetscReal lerror = 0.; 4438 PetscInt i; 4439 4440 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4441 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4442 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4443 for (i=0;i<local_size;i++) 4444 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4445 lerror = PetscAbsScalar(x[i]-y[i]); 4446 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4447 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4448 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4449 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4450 if (error > PETSC_SMALL) { 4451 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4452 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4453 } else { 4454 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4455 } 4456 } 4457 } 4458 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4459 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4460 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4461 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4462 if (error > PETSC_SMALL) { 4463 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4464 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4465 } else { 4466 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4467 } 4468 } 4469 ierr = VecDestroy(&x);CHKERRQ(ierr); 4470 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4471 } 4472 4473 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4474 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4475 4476 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4477 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4478 if (isseqaij) { 4479 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4480 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4481 if (lA) { 4482 Mat work; 4483 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4484 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4485 ierr = MatDestroy(&work);CHKERRQ(ierr); 4486 } 4487 } else { 4488 Mat work_mat; 4489 4490 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4491 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4492 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4493 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4494 if (lA) { 4495 Mat work; 4496 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4497 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4498 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4499 ierr = MatDestroy(&work);CHKERRQ(ierr); 4500 } 4501 } 4502 if (matis->A->symmetric_set) { 4503 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4504 #if !defined(PETSC_USE_COMPLEX) 4505 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4506 #endif 4507 } 4508 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4513 { 4514 PC_IS* pcis = (PC_IS*)(pc->data); 4515 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4516 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4517 PetscInt *idx_R_local=NULL; 4518 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4519 PetscInt vbs,bs; 4520 PetscBT bitmask=NULL; 4521 PetscErrorCode ierr; 4522 4523 PetscFunctionBegin; 4524 /* 4525 No need to setup local scatters if 4526 - primal space is unchanged 4527 AND 4528 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4529 AND 4530 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4531 */ 4532 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4533 PetscFunctionReturn(0); 4534 } 4535 /* destroy old objects */ 4536 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4537 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4538 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4539 /* Set Non-overlapping dimensions */ 4540 n_B = pcis->n_B; 4541 n_D = pcis->n - n_B; 4542 n_vertices = pcbddc->n_vertices; 4543 4544 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4545 4546 /* create auxiliary bitmask and allocate workspace */ 4547 if (!sub_schurs || !sub_schurs->reuse_solver) { 4548 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4549 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4550 for (i=0;i<n_vertices;i++) { 4551 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4552 } 4553 4554 for (i=0, n_R=0; i<pcis->n; i++) { 4555 if (!PetscBTLookup(bitmask,i)) { 4556 idx_R_local[n_R++] = i; 4557 } 4558 } 4559 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4560 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4561 4562 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4563 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4564 } 4565 4566 /* Block code */ 4567 vbs = 1; 4568 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4569 if (bs>1 && !(n_vertices%bs)) { 4570 PetscBool is_blocked = PETSC_TRUE; 4571 PetscInt *vary; 4572 if (!sub_schurs || !sub_schurs->reuse_solver) { 4573 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4574 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4575 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4576 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4577 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4578 for (i=0; i<pcis->n/bs; i++) { 4579 if (vary[i]!=0 && vary[i]!=bs) { 4580 is_blocked = PETSC_FALSE; 4581 break; 4582 } 4583 } 4584 ierr = PetscFree(vary);CHKERRQ(ierr); 4585 } else { 4586 /* Verify directly the R set */ 4587 for (i=0; i<n_R/bs; i++) { 4588 PetscInt j,node=idx_R_local[bs*i]; 4589 for (j=1; j<bs; j++) { 4590 if (node != idx_R_local[bs*i+j]-j) { 4591 is_blocked = PETSC_FALSE; 4592 break; 4593 } 4594 } 4595 } 4596 } 4597 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4598 vbs = bs; 4599 for (i=0;i<n_R/vbs;i++) { 4600 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4601 } 4602 } 4603 } 4604 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4605 if (sub_schurs && sub_schurs->reuse_solver) { 4606 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4607 4608 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4609 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4610 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4611 reuse_solver->is_R = pcbddc->is_R_local; 4612 } else { 4613 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4614 } 4615 4616 /* print some info if requested */ 4617 if (pcbddc->dbg_flag) { 4618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4619 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4620 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4624 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4625 } 4626 4627 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4628 if (!sub_schurs || !sub_schurs->reuse_solver) { 4629 IS is_aux1,is_aux2; 4630 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4631 4632 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4633 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4634 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4635 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4636 for (i=0; i<n_D; i++) { 4637 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4638 } 4639 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4640 for (i=0, j=0; i<n_R; i++) { 4641 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4642 aux_array1[j++] = i; 4643 } 4644 } 4645 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4646 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4647 for (i=0, j=0; i<n_B; i++) { 4648 if (!PetscBTLookup(bitmask,is_indices[i])) { 4649 aux_array2[j++] = i; 4650 } 4651 } 4652 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4653 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4654 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4655 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4656 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4657 4658 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4659 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4660 for (i=0, j=0; i<n_R; i++) { 4661 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4662 aux_array1[j++] = i; 4663 } 4664 } 4665 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4666 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4667 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4668 } 4669 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4670 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4671 } else { 4672 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4673 IS tis; 4674 PetscInt schur_size; 4675 4676 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4677 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4678 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4679 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4680 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4681 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4682 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4683 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4684 } 4685 } 4686 PetscFunctionReturn(0); 4687 } 4688 4689 4690 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4691 { 4692 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4693 PC_IS *pcis = (PC_IS*)pc->data; 4694 PC pc_temp; 4695 Mat A_RR; 4696 MatReuse reuse; 4697 PetscScalar m_one = -1.0; 4698 PetscReal value; 4699 PetscInt n_D,n_R; 4700 PetscBool check_corr[2],issbaij; 4701 PetscErrorCode ierr; 4702 /* prefixes stuff */ 4703 char dir_prefix[256],neu_prefix[256],str_level[16]; 4704 size_t len; 4705 4706 PetscFunctionBegin; 4707 4708 /* compute prefixes */ 4709 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4710 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4711 if (!pcbddc->current_level) { 4712 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4713 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4714 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4715 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4716 } else { 4717 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4718 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4719 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4720 len -= 15; /* remove "pc_bddc_coarse_" */ 4721 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4722 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4723 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4724 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4725 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4726 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4727 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4728 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4729 } 4730 4731 /* DIRICHLET PROBLEM */ 4732 if (dirichlet) { 4733 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4734 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4735 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4736 if (pcbddc->dbg_flag) { 4737 Mat A_IIn; 4738 4739 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4740 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4741 pcis->A_II = A_IIn; 4742 } 4743 } 4744 if (pcbddc->local_mat->symmetric_set) { 4745 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4746 } 4747 /* Matrix for Dirichlet problem is pcis->A_II */ 4748 n_D = pcis->n - pcis->n_B; 4749 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4750 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4751 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4752 /* default */ 4753 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4754 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4755 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4756 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4757 if (issbaij) { 4758 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4759 } else { 4760 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4761 } 4762 /* Allow user's customization */ 4763 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4764 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4765 } 4766 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4767 if (sub_schurs && sub_schurs->reuse_solver) { 4768 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4769 4770 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4771 } 4772 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4773 if (!n_D) { 4774 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4775 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4776 } 4777 /* Set Up KSP for Dirichlet problem of BDDC */ 4778 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4779 /* set ksp_D into pcis data */ 4780 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4781 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4782 pcis->ksp_D = pcbddc->ksp_D; 4783 } 4784 4785 /* NEUMANN PROBLEM */ 4786 A_RR = 0; 4787 if (neumann) { 4788 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4789 PetscInt ibs,mbs; 4790 PetscBool issbaij; 4791 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4792 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4793 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4794 if (pcbddc->ksp_R) { /* already created ksp */ 4795 PetscInt nn_R; 4796 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4797 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4798 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4799 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4800 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4801 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4802 reuse = MAT_INITIAL_MATRIX; 4803 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4804 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4805 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4806 reuse = MAT_INITIAL_MATRIX; 4807 } else { /* safe to reuse the matrix */ 4808 reuse = MAT_REUSE_MATRIX; 4809 } 4810 } 4811 /* last check */ 4812 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4813 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4814 reuse = MAT_INITIAL_MATRIX; 4815 } 4816 } else { /* first time, so we need to create the matrix */ 4817 reuse = MAT_INITIAL_MATRIX; 4818 } 4819 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4820 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4821 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4822 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4823 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4824 if (matis->A == pcbddc->local_mat) { 4825 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4826 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4827 } else { 4828 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4829 } 4830 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4831 if (matis->A == pcbddc->local_mat) { 4832 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4833 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4834 } else { 4835 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4836 } 4837 } 4838 /* extract A_RR */ 4839 if (sub_schurs && sub_schurs->reuse_solver) { 4840 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4841 4842 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4843 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4844 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4845 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4846 } else { 4847 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4848 } 4849 } else { 4850 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4851 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4852 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4853 } 4854 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4855 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4856 } 4857 if (pcbddc->local_mat->symmetric_set) { 4858 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4859 } 4860 if (!pcbddc->ksp_R) { /* create object if not present */ 4861 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4862 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4863 /* default */ 4864 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4865 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4866 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4867 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4868 if (issbaij) { 4869 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4870 } else { 4871 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4872 } 4873 /* Allow user's customization */ 4874 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4875 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4876 } 4877 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4878 if (!n_R) { 4879 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4880 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4881 } 4882 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4883 /* Reuse solver if it is present */ 4884 if (sub_schurs && sub_schurs->reuse_solver) { 4885 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4886 4887 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4888 } 4889 /* Set Up KSP for Neumann problem of BDDC */ 4890 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4891 } 4892 4893 if (pcbddc->dbg_flag) { 4894 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4895 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4896 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4897 } 4898 4899 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4900 check_corr[0] = check_corr[1] = PETSC_FALSE; 4901 if (pcbddc->NullSpace_corr[0]) { 4902 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4903 } 4904 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4905 check_corr[0] = PETSC_TRUE; 4906 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4907 } 4908 if (neumann && pcbddc->NullSpace_corr[2]) { 4909 check_corr[1] = PETSC_TRUE; 4910 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4911 } 4912 4913 /* check Dirichlet and Neumann solvers */ 4914 if (pcbddc->dbg_flag) { 4915 if (dirichlet) { /* Dirichlet */ 4916 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4917 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4918 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4919 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4920 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4921 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4922 if (check_corr[0]) { 4923 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4924 } 4925 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4926 } 4927 if (neumann) { /* Neumann */ 4928 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4929 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4930 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4931 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4932 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4933 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4934 if (check_corr[1]) { 4935 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4936 } 4937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4938 } 4939 } 4940 /* free Neumann problem's matrix */ 4941 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4942 PetscFunctionReturn(0); 4943 } 4944 4945 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4946 { 4947 PetscErrorCode ierr; 4948 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4949 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4950 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4951 4952 PetscFunctionBegin; 4953 if (!reuse_solver) { 4954 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4955 } 4956 if (!pcbddc->switch_static) { 4957 if (applytranspose && pcbddc->local_auxmat1) { 4958 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4959 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4960 } 4961 if (!reuse_solver) { 4962 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4963 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4964 } else { 4965 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4966 4967 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4968 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4969 } 4970 } else { 4971 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4972 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4973 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4974 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4975 if (applytranspose && pcbddc->local_auxmat1) { 4976 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4977 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4978 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4979 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4980 } 4981 } 4982 if (!reuse_solver || pcbddc->switch_static) { 4983 if (applytranspose) { 4984 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4985 } else { 4986 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4987 } 4988 } else { 4989 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4990 4991 if (applytranspose) { 4992 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4993 } else { 4994 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4995 } 4996 } 4997 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4998 if (!pcbddc->switch_static) { 4999 if (!reuse_solver) { 5000 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5001 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5002 } else { 5003 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5004 5005 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5006 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5007 } 5008 if (!applytranspose && pcbddc->local_auxmat1) { 5009 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5010 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5011 } 5012 } else { 5013 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5014 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5015 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5016 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5017 if (!applytranspose && pcbddc->local_auxmat1) { 5018 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5019 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5020 } 5021 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5022 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5023 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5024 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5025 } 5026 PetscFunctionReturn(0); 5027 } 5028 5029 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5030 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5031 { 5032 PetscErrorCode ierr; 5033 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5034 PC_IS* pcis = (PC_IS*) (pc->data); 5035 const PetscScalar zero = 0.0; 5036 5037 PetscFunctionBegin; 5038 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5039 if (!pcbddc->benign_apply_coarse_only) { 5040 if (applytranspose) { 5041 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5042 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5043 } else { 5044 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5045 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5046 } 5047 } else { 5048 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5049 } 5050 5051 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5052 if (pcbddc->benign_n) { 5053 PetscScalar *array; 5054 PetscInt j; 5055 5056 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5057 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5058 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5059 } 5060 5061 /* start communications from local primal nodes to rhs of coarse solver */ 5062 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5063 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5064 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5065 5066 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5067 if (pcbddc->coarse_ksp) { 5068 Mat coarse_mat; 5069 Vec rhs,sol; 5070 MatNullSpace nullsp; 5071 PetscBool isbddc = PETSC_FALSE; 5072 5073 if (pcbddc->benign_have_null) { 5074 PC coarse_pc; 5075 5076 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5077 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5078 /* we need to propagate to coarser levels the need for a possible benign correction */ 5079 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5080 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5081 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5082 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5083 } 5084 } 5085 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5086 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5087 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5088 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5089 if (nullsp) { 5090 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5091 } 5092 if (applytranspose) { 5093 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5094 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5095 } else { 5096 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5097 PC coarse_pc; 5098 5099 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5100 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5101 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5102 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5103 } else { 5104 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5105 } 5106 } 5107 /* we don't need the benign correction at coarser levels anymore */ 5108 if (pcbddc->benign_have_null && isbddc) { 5109 PC coarse_pc; 5110 PC_BDDC* coarsepcbddc; 5111 5112 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5113 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5114 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5115 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5116 } 5117 if (nullsp) { 5118 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5119 } 5120 } 5121 5122 /* Local solution on R nodes */ 5123 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5124 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5125 } 5126 /* communications from coarse sol to local primal nodes */ 5127 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5128 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5129 5130 /* Sum contributions from the two levels */ 5131 if (!pcbddc->benign_apply_coarse_only) { 5132 if (applytranspose) { 5133 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5134 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5135 } else { 5136 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5137 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5138 } 5139 /* store p0 */ 5140 if (pcbddc->benign_n) { 5141 PetscScalar *array; 5142 PetscInt j; 5143 5144 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5145 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5146 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5147 } 5148 } else { /* expand the coarse solution */ 5149 if (applytranspose) { 5150 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5151 } else { 5152 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5153 } 5154 } 5155 PetscFunctionReturn(0); 5156 } 5157 5158 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5159 { 5160 PetscErrorCode ierr; 5161 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5162 PetscScalar *array; 5163 Vec from,to; 5164 5165 PetscFunctionBegin; 5166 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5167 from = pcbddc->coarse_vec; 5168 to = pcbddc->vec1_P; 5169 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5170 Vec tvec; 5171 5172 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5173 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5174 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5175 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5176 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5177 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5178 } 5179 } else { /* from local to global -> put data in coarse right hand side */ 5180 from = pcbddc->vec1_P; 5181 to = pcbddc->coarse_vec; 5182 } 5183 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5184 PetscFunctionReturn(0); 5185 } 5186 5187 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5188 { 5189 PetscErrorCode ierr; 5190 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5191 PetscScalar *array; 5192 Vec from,to; 5193 5194 PetscFunctionBegin; 5195 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5196 from = pcbddc->coarse_vec; 5197 to = pcbddc->vec1_P; 5198 } else { /* from local to global -> put data in coarse right hand side */ 5199 from = pcbddc->vec1_P; 5200 to = pcbddc->coarse_vec; 5201 } 5202 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5203 if (smode == SCATTER_FORWARD) { 5204 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5205 Vec tvec; 5206 5207 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5208 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5209 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5210 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5211 } 5212 } else { 5213 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5214 ierr = VecResetArray(from);CHKERRQ(ierr); 5215 } 5216 } 5217 PetscFunctionReturn(0); 5218 } 5219 5220 /* uncomment for testing purposes */ 5221 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5222 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5223 { 5224 PetscErrorCode ierr; 5225 PC_IS* pcis = (PC_IS*)(pc->data); 5226 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5227 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5228 /* one and zero */ 5229 PetscScalar one=1.0,zero=0.0; 5230 /* space to store constraints and their local indices */ 5231 PetscScalar *constraints_data; 5232 PetscInt *constraints_idxs,*constraints_idxs_B; 5233 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5234 PetscInt *constraints_n; 5235 /* iterators */ 5236 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5237 /* BLAS integers */ 5238 PetscBLASInt lwork,lierr; 5239 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5240 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5241 /* reuse */ 5242 PetscInt olocal_primal_size,olocal_primal_size_cc; 5243 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5244 /* change of basis */ 5245 PetscBool qr_needed; 5246 PetscBT change_basis,qr_needed_idx; 5247 /* auxiliary stuff */ 5248 PetscInt *nnz,*is_indices; 5249 PetscInt ncc; 5250 /* some quantities */ 5251 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5252 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5253 5254 PetscFunctionBegin; 5255 /* Destroy Mat objects computed previously */ 5256 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5257 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5258 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5259 /* save info on constraints from previous setup (if any) */ 5260 olocal_primal_size = pcbddc->local_primal_size; 5261 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5262 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5263 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5264 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5265 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5266 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5267 5268 if (!pcbddc->adaptive_selection) { 5269 IS ISForVertices,*ISForFaces,*ISForEdges; 5270 MatNullSpace nearnullsp; 5271 const Vec *nearnullvecs; 5272 Vec *localnearnullsp; 5273 PetscScalar *array; 5274 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5275 PetscBool nnsp_has_cnst; 5276 /* LAPACK working arrays for SVD or POD */ 5277 PetscBool skip_lapack,boolforchange; 5278 PetscScalar *work; 5279 PetscReal *singular_vals; 5280 #if defined(PETSC_USE_COMPLEX) 5281 PetscReal *rwork; 5282 #endif 5283 #if defined(PETSC_MISSING_LAPACK_GESVD) 5284 PetscScalar *temp_basis,*correlation_mat; 5285 #else 5286 PetscBLASInt dummy_int=1; 5287 PetscScalar dummy_scalar=1.; 5288 #endif 5289 5290 /* Get index sets for faces, edges and vertices from graph */ 5291 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5292 /* print some info */ 5293 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5294 PetscInt nv; 5295 5296 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5297 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5298 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5299 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5301 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5302 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5303 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5304 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5305 } 5306 5307 /* free unneeded index sets */ 5308 if (!pcbddc->use_vertices) { 5309 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5310 } 5311 if (!pcbddc->use_edges) { 5312 for (i=0;i<n_ISForEdges;i++) { 5313 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5314 } 5315 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5316 n_ISForEdges = 0; 5317 } 5318 if (!pcbddc->use_faces) { 5319 for (i=0;i<n_ISForFaces;i++) { 5320 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5321 } 5322 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5323 n_ISForFaces = 0; 5324 } 5325 5326 /* check if near null space is attached to global mat */ 5327 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5328 if (nearnullsp) { 5329 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5330 /* remove any stored info */ 5331 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5332 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5333 /* store information for BDDC solver reuse */ 5334 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5335 pcbddc->onearnullspace = nearnullsp; 5336 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5337 for (i=0;i<nnsp_size;i++) { 5338 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5339 } 5340 } else { /* if near null space is not provided BDDC uses constants by default */ 5341 nnsp_size = 0; 5342 nnsp_has_cnst = PETSC_TRUE; 5343 } 5344 /* get max number of constraints on a single cc */ 5345 max_constraints = nnsp_size; 5346 if (nnsp_has_cnst) max_constraints++; 5347 5348 /* 5349 Evaluate maximum storage size needed by the procedure 5350 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5351 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5352 There can be multiple constraints per connected component 5353 */ 5354 n_vertices = 0; 5355 if (ISForVertices) { 5356 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5357 } 5358 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5359 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5360 5361 total_counts = n_ISForFaces+n_ISForEdges; 5362 total_counts *= max_constraints; 5363 total_counts += n_vertices; 5364 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5365 5366 total_counts = 0; 5367 max_size_of_constraint = 0; 5368 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5369 IS used_is; 5370 if (i<n_ISForEdges) { 5371 used_is = ISForEdges[i]; 5372 } else { 5373 used_is = ISForFaces[i-n_ISForEdges]; 5374 } 5375 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5376 total_counts += j; 5377 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5378 } 5379 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5380 5381 /* get local part of global near null space vectors */ 5382 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5383 for (k=0;k<nnsp_size;k++) { 5384 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5385 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5386 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5387 } 5388 5389 /* whether or not to skip lapack calls */ 5390 skip_lapack = PETSC_TRUE; 5391 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5392 5393 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5394 if (!skip_lapack) { 5395 PetscScalar temp_work; 5396 5397 #if defined(PETSC_MISSING_LAPACK_GESVD) 5398 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5399 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5400 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5401 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5402 #if defined(PETSC_USE_COMPLEX) 5403 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5404 #endif 5405 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5406 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5407 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5408 lwork = -1; 5409 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5410 #if !defined(PETSC_USE_COMPLEX) 5411 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5412 #else 5413 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5414 #endif 5415 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5416 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5417 #else /* on missing GESVD */ 5418 /* SVD */ 5419 PetscInt max_n,min_n; 5420 max_n = max_size_of_constraint; 5421 min_n = max_constraints; 5422 if (max_size_of_constraint < max_constraints) { 5423 min_n = max_size_of_constraint; 5424 max_n = max_constraints; 5425 } 5426 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5427 #if defined(PETSC_USE_COMPLEX) 5428 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5429 #endif 5430 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5431 lwork = -1; 5432 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5433 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5434 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5435 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5436 #if !defined(PETSC_USE_COMPLEX) 5437 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5438 #else 5439 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5440 #endif 5441 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5442 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5443 #endif /* on missing GESVD */ 5444 /* Allocate optimal workspace */ 5445 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5446 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5447 } 5448 /* Now we can loop on constraining sets */ 5449 total_counts = 0; 5450 constraints_idxs_ptr[0] = 0; 5451 constraints_data_ptr[0] = 0; 5452 /* vertices */ 5453 if (n_vertices) { 5454 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5455 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5456 for (i=0;i<n_vertices;i++) { 5457 constraints_n[total_counts] = 1; 5458 constraints_data[total_counts] = 1.0; 5459 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5460 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5461 total_counts++; 5462 } 5463 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5464 n_vertices = total_counts; 5465 } 5466 5467 /* edges and faces */ 5468 total_counts_cc = total_counts; 5469 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5470 IS used_is; 5471 PetscBool idxs_copied = PETSC_FALSE; 5472 5473 if (ncc<n_ISForEdges) { 5474 used_is = ISForEdges[ncc]; 5475 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5476 } else { 5477 used_is = ISForFaces[ncc-n_ISForEdges]; 5478 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5479 } 5480 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5481 5482 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5483 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5484 /* change of basis should not be performed on local periodic nodes */ 5485 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5486 if (nnsp_has_cnst) { 5487 PetscScalar quad_value; 5488 5489 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5490 idxs_copied = PETSC_TRUE; 5491 5492 if (!pcbddc->use_nnsp_true) { 5493 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5494 } else { 5495 quad_value = 1.0; 5496 } 5497 for (j=0;j<size_of_constraint;j++) { 5498 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5499 } 5500 temp_constraints++; 5501 total_counts++; 5502 } 5503 for (k=0;k<nnsp_size;k++) { 5504 PetscReal real_value; 5505 PetscScalar *ptr_to_data; 5506 5507 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5508 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5509 for (j=0;j<size_of_constraint;j++) { 5510 ptr_to_data[j] = array[is_indices[j]]; 5511 } 5512 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5513 /* check if array is null on the connected component */ 5514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5515 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5516 if (real_value > 0.0) { /* keep indices and values */ 5517 temp_constraints++; 5518 total_counts++; 5519 if (!idxs_copied) { 5520 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5521 idxs_copied = PETSC_TRUE; 5522 } 5523 } 5524 } 5525 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5526 valid_constraints = temp_constraints; 5527 if (!pcbddc->use_nnsp_true && temp_constraints) { 5528 if (temp_constraints == 1) { /* just normalize the constraint */ 5529 PetscScalar norm,*ptr_to_data; 5530 5531 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5533 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5534 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5535 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5536 } else { /* perform SVD */ 5537 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5538 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5539 5540 #if defined(PETSC_MISSING_LAPACK_GESVD) 5541 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5542 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5543 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5544 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5545 from that computed using LAPACKgesvd 5546 -> This is due to a different computation of eigenvectors in LAPACKheev 5547 -> The quality of the POD-computed basis will be the same */ 5548 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5549 /* Store upper triangular part of correlation matrix */ 5550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5552 for (j=0;j<temp_constraints;j++) { 5553 for (k=0;k<j+1;k++) { 5554 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5555 } 5556 } 5557 /* compute eigenvalues and eigenvectors of correlation matrix */ 5558 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5559 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5560 #if !defined(PETSC_USE_COMPLEX) 5561 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5562 #else 5563 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5564 #endif 5565 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5566 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5567 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5568 j = 0; 5569 while (j < temp_constraints && singular_vals[j] < tol) j++; 5570 total_counts = total_counts-j; 5571 valid_constraints = temp_constraints-j; 5572 /* scale and copy POD basis into used quadrature memory */ 5573 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5574 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5575 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5576 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5577 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5579 if (j<temp_constraints) { 5580 PetscInt ii; 5581 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5582 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5583 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5584 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5585 for (k=0;k<temp_constraints-j;k++) { 5586 for (ii=0;ii<size_of_constraint;ii++) { 5587 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5588 } 5589 } 5590 } 5591 #else /* on missing GESVD */ 5592 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5593 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5594 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5595 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5596 #if !defined(PETSC_USE_COMPLEX) 5597 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5598 #else 5599 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5600 #endif 5601 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5602 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5603 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5604 k = temp_constraints; 5605 if (k > size_of_constraint) k = size_of_constraint; 5606 j = 0; 5607 while (j < k && singular_vals[k-j-1] < tol) j++; 5608 valid_constraints = k-j; 5609 total_counts = total_counts-temp_constraints+valid_constraints; 5610 #endif /* on missing GESVD */ 5611 } 5612 } 5613 /* update pointers information */ 5614 if (valid_constraints) { 5615 constraints_n[total_counts_cc] = valid_constraints; 5616 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5617 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5618 /* set change_of_basis flag */ 5619 if (boolforchange) { 5620 PetscBTSet(change_basis,total_counts_cc); 5621 } 5622 total_counts_cc++; 5623 } 5624 } 5625 /* free workspace */ 5626 if (!skip_lapack) { 5627 ierr = PetscFree(work);CHKERRQ(ierr); 5628 #if defined(PETSC_USE_COMPLEX) 5629 ierr = PetscFree(rwork);CHKERRQ(ierr); 5630 #endif 5631 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5632 #if defined(PETSC_MISSING_LAPACK_GESVD) 5633 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5634 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5635 #endif 5636 } 5637 for (k=0;k<nnsp_size;k++) { 5638 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5639 } 5640 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5641 /* free index sets of faces, edges and vertices */ 5642 for (i=0;i<n_ISForFaces;i++) { 5643 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5644 } 5645 if (n_ISForFaces) { 5646 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5647 } 5648 for (i=0;i<n_ISForEdges;i++) { 5649 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5650 } 5651 if (n_ISForEdges) { 5652 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5653 } 5654 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5655 } else { 5656 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5657 5658 total_counts = 0; 5659 n_vertices = 0; 5660 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5661 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5662 } 5663 max_constraints = 0; 5664 total_counts_cc = 0; 5665 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5666 total_counts += pcbddc->adaptive_constraints_n[i]; 5667 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5668 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5669 } 5670 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5671 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5672 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5673 constraints_data = pcbddc->adaptive_constraints_data; 5674 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5675 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5676 total_counts_cc = 0; 5677 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5678 if (pcbddc->adaptive_constraints_n[i]) { 5679 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5680 } 5681 } 5682 #if 0 5683 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5684 for (i=0;i<total_counts_cc;i++) { 5685 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5686 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5687 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5688 printf(" %d",constraints_idxs[j]); 5689 } 5690 printf("\n"); 5691 printf("number of cc: %d\n",constraints_n[i]); 5692 } 5693 for (i=0;i<n_vertices;i++) { 5694 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5695 } 5696 for (i=0;i<sub_schurs->n_subs;i++) { 5697 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5698 } 5699 #endif 5700 5701 max_size_of_constraint = 0; 5702 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5703 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5704 /* Change of basis */ 5705 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5706 if (pcbddc->use_change_of_basis) { 5707 for (i=0;i<sub_schurs->n_subs;i++) { 5708 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5709 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5710 } 5711 } 5712 } 5713 } 5714 pcbddc->local_primal_size = total_counts; 5715 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5716 5717 /* map constraints_idxs in boundary numbering */ 5718 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5719 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5720 5721 /* Create constraint matrix */ 5722 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5723 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5724 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5725 5726 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5727 /* determine if a QR strategy is needed for change of basis */ 5728 qr_needed = PETSC_FALSE; 5729 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5730 total_primal_vertices=0; 5731 pcbddc->local_primal_size_cc = 0; 5732 for (i=0;i<total_counts_cc;i++) { 5733 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5734 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5735 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5736 pcbddc->local_primal_size_cc += 1; 5737 } else if (PetscBTLookup(change_basis,i)) { 5738 for (k=0;k<constraints_n[i];k++) { 5739 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5740 } 5741 pcbddc->local_primal_size_cc += constraints_n[i]; 5742 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5743 PetscBTSet(qr_needed_idx,i); 5744 qr_needed = PETSC_TRUE; 5745 } 5746 } else { 5747 pcbddc->local_primal_size_cc += 1; 5748 } 5749 } 5750 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5751 pcbddc->n_vertices = total_primal_vertices; 5752 /* permute indices in order to have a sorted set of vertices */ 5753 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5754 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5755 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5756 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5757 5758 /* nonzero structure of constraint matrix */ 5759 /* and get reference dof for local constraints */ 5760 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5761 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5762 5763 j = total_primal_vertices; 5764 total_counts = total_primal_vertices; 5765 cum = total_primal_vertices; 5766 for (i=n_vertices;i<total_counts_cc;i++) { 5767 if (!PetscBTLookup(change_basis,i)) { 5768 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5769 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5770 cum++; 5771 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5772 for (k=0;k<constraints_n[i];k++) { 5773 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5774 nnz[j+k] = size_of_constraint; 5775 } 5776 j += constraints_n[i]; 5777 } 5778 } 5779 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5780 ierr = PetscFree(nnz);CHKERRQ(ierr); 5781 5782 /* set values in constraint matrix */ 5783 for (i=0;i<total_primal_vertices;i++) { 5784 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5785 } 5786 total_counts = total_primal_vertices; 5787 for (i=n_vertices;i<total_counts_cc;i++) { 5788 if (!PetscBTLookup(change_basis,i)) { 5789 PetscInt *cols; 5790 5791 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5792 cols = constraints_idxs+constraints_idxs_ptr[i]; 5793 for (k=0;k<constraints_n[i];k++) { 5794 PetscInt row = total_counts+k; 5795 PetscScalar *vals; 5796 5797 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5798 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5799 } 5800 total_counts += constraints_n[i]; 5801 } 5802 } 5803 /* assembling */ 5804 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5805 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5806 5807 /* 5808 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5809 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5810 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5811 */ 5812 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5813 if (pcbddc->use_change_of_basis) { 5814 /* dual and primal dofs on a single cc */ 5815 PetscInt dual_dofs,primal_dofs; 5816 /* working stuff for GEQRF */ 5817 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5818 PetscBLASInt lqr_work; 5819 /* working stuff for UNGQR */ 5820 PetscScalar *gqr_work,lgqr_work_t; 5821 PetscBLASInt lgqr_work; 5822 /* working stuff for TRTRS */ 5823 PetscScalar *trs_rhs; 5824 PetscBLASInt Blas_NRHS; 5825 /* pointers for values insertion into change of basis matrix */ 5826 PetscInt *start_rows,*start_cols; 5827 PetscScalar *start_vals; 5828 /* working stuff for values insertion */ 5829 PetscBT is_primal; 5830 PetscInt *aux_primal_numbering_B; 5831 /* matrix sizes */ 5832 PetscInt global_size,local_size; 5833 /* temporary change of basis */ 5834 Mat localChangeOfBasisMatrix; 5835 /* extra space for debugging */ 5836 PetscScalar *dbg_work; 5837 5838 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5839 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5840 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5841 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5842 /* nonzeros for local mat */ 5843 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5844 if (!pcbddc->benign_change || pcbddc->fake_change) { 5845 for (i=0;i<pcis->n;i++) nnz[i]=1; 5846 } else { 5847 const PetscInt *ii; 5848 PetscInt n; 5849 PetscBool flg_row; 5850 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5851 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5852 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5853 } 5854 for (i=n_vertices;i<total_counts_cc;i++) { 5855 if (PetscBTLookup(change_basis,i)) { 5856 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5857 if (PetscBTLookup(qr_needed_idx,i)) { 5858 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5859 } else { 5860 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5861 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5862 } 5863 } 5864 } 5865 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5866 ierr = PetscFree(nnz);CHKERRQ(ierr); 5867 /* Set interior change in the matrix */ 5868 if (!pcbddc->benign_change || pcbddc->fake_change) { 5869 for (i=0;i<pcis->n;i++) { 5870 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5871 } 5872 } else { 5873 const PetscInt *ii,*jj; 5874 PetscScalar *aa; 5875 PetscInt n; 5876 PetscBool flg_row; 5877 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5878 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5879 for (i=0;i<n;i++) { 5880 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5881 } 5882 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5883 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5884 } 5885 5886 if (pcbddc->dbg_flag) { 5887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5888 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5889 } 5890 5891 5892 /* Now we loop on the constraints which need a change of basis */ 5893 /* 5894 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5895 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5896 5897 Basic blocks of change of basis matrix T computed by 5898 5899 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5900 5901 | 1 0 ... 0 s_1/S | 5902 | 0 1 ... 0 s_2/S | 5903 | ... | 5904 | 0 ... 1 s_{n-1}/S | 5905 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5906 5907 with S = \sum_{i=1}^n s_i^2 5908 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5909 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5910 5911 - QR decomposition of constraints otherwise 5912 */ 5913 if (qr_needed) { 5914 /* space to store Q */ 5915 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5916 /* array to store scaling factors for reflectors */ 5917 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5918 /* first we issue queries for optimal work */ 5919 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5920 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5921 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5922 lqr_work = -1; 5923 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5924 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5925 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5926 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5927 lgqr_work = -1; 5928 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5929 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5930 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5931 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5932 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5933 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5934 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5935 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5936 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5937 /* array to store rhs and solution of triangular solver */ 5938 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5939 /* allocating workspace for check */ 5940 if (pcbddc->dbg_flag) { 5941 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5942 } 5943 } 5944 /* array to store whether a node is primal or not */ 5945 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5946 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5947 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5948 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5949 for (i=0;i<total_primal_vertices;i++) { 5950 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5951 } 5952 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5953 5954 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5955 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5956 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5957 if (PetscBTLookup(change_basis,total_counts)) { 5958 /* get constraint info */ 5959 primal_dofs = constraints_n[total_counts]; 5960 dual_dofs = size_of_constraint-primal_dofs; 5961 5962 if (pcbddc->dbg_flag) { 5963 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5964 } 5965 5966 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5967 5968 /* copy quadrature constraints for change of basis check */ 5969 if (pcbddc->dbg_flag) { 5970 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5971 } 5972 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5973 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5974 5975 /* compute QR decomposition of constraints */ 5976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5977 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5978 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5979 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5980 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5981 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5982 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5983 5984 /* explictly compute R^-T */ 5985 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5986 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5987 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5988 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5989 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5990 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5991 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5992 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5993 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5994 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5995 5996 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5997 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5998 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5999 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6000 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6001 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6002 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6003 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 6004 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6005 6006 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6007 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6008 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6009 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6010 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6011 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6013 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6015 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6016 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6018 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6019 6020 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6021 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6022 /* insert cols for primal dofs */ 6023 for (j=0;j<primal_dofs;j++) { 6024 start_vals = &qr_basis[j*size_of_constraint]; 6025 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6026 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6027 } 6028 /* insert cols for dual dofs */ 6029 for (j=0,k=0;j<dual_dofs;k++) { 6030 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6031 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6032 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6033 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6034 j++; 6035 } 6036 } 6037 6038 /* check change of basis */ 6039 if (pcbddc->dbg_flag) { 6040 PetscInt ii,jj; 6041 PetscBool valid_qr=PETSC_TRUE; 6042 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6043 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6045 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6046 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6047 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6048 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6049 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6050 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6051 for (jj=0;jj<size_of_constraint;jj++) { 6052 for (ii=0;ii<primal_dofs;ii++) { 6053 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6054 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 6055 } 6056 } 6057 if (!valid_qr) { 6058 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6059 for (jj=0;jj<size_of_constraint;jj++) { 6060 for (ii=0;ii<primal_dofs;ii++) { 6061 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6062 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6063 } 6064 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 6065 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6066 } 6067 } 6068 } 6069 } else { 6070 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6071 } 6072 } 6073 } else { /* simple transformation block */ 6074 PetscInt row,col; 6075 PetscScalar val,norm; 6076 6077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6078 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6079 for (j=0;j<size_of_constraint;j++) { 6080 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6081 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6082 if (!PetscBTLookup(is_primal,row_B)) { 6083 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6084 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6085 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6086 } else { 6087 for (k=0;k<size_of_constraint;k++) { 6088 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6089 if (row != col) { 6090 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6091 } else { 6092 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6093 } 6094 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6095 } 6096 } 6097 } 6098 if (pcbddc->dbg_flag) { 6099 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6100 } 6101 } 6102 } else { 6103 if (pcbddc->dbg_flag) { 6104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6105 } 6106 } 6107 } 6108 6109 /* free workspace */ 6110 if (qr_needed) { 6111 if (pcbddc->dbg_flag) { 6112 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6113 } 6114 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6115 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6116 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6117 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6118 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6119 } 6120 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6121 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6122 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6123 6124 /* assembling of global change of variable */ 6125 if (!pcbddc->fake_change) { 6126 Mat tmat; 6127 PetscInt bs; 6128 6129 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6130 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6131 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6132 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6133 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6134 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6135 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6136 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6137 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6138 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6139 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6140 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6141 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6142 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6143 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6144 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6145 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6146 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6147 6148 /* check */ 6149 if (pcbddc->dbg_flag) { 6150 PetscReal error; 6151 Vec x,x_change; 6152 6153 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6154 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6155 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6156 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6157 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6158 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6159 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6160 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6161 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6162 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6163 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6164 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6165 if (error > PETSC_SMALL) { 6166 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6167 } 6168 ierr = VecDestroy(&x);CHKERRQ(ierr); 6169 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6170 } 6171 /* adapt sub_schurs computed (if any) */ 6172 if (pcbddc->use_deluxe_scaling) { 6173 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6174 6175 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6176 if (sub_schurs && sub_schurs->S_Ej_all) { 6177 Mat S_new,tmat; 6178 IS is_all_N,is_V_Sall = NULL; 6179 6180 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6181 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6182 if (pcbddc->deluxe_zerorows) { 6183 ISLocalToGlobalMapping NtoSall; 6184 IS is_V; 6185 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6186 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6187 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6188 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6189 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6190 } 6191 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6192 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6193 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6194 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6195 if (pcbddc->deluxe_zerorows) { 6196 const PetscScalar *array; 6197 const PetscInt *idxs_V,*idxs_all; 6198 PetscInt i,n_V; 6199 6200 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6201 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6202 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6203 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6204 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6205 for (i=0;i<n_V;i++) { 6206 PetscScalar val; 6207 PetscInt idx; 6208 6209 idx = idxs_V[i]; 6210 val = array[idxs_all[idxs_V[i]]]; 6211 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6212 } 6213 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6214 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6215 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6216 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6217 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6218 } 6219 sub_schurs->S_Ej_all = S_new; 6220 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6221 if (sub_schurs->sum_S_Ej_all) { 6222 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6223 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6224 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6225 if (pcbddc->deluxe_zerorows) { 6226 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6227 } 6228 sub_schurs->sum_S_Ej_all = S_new; 6229 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6230 } 6231 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6232 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6233 } 6234 /* destroy any change of basis context in sub_schurs */ 6235 if (sub_schurs && sub_schurs->change) { 6236 PetscInt i; 6237 6238 for (i=0;i<sub_schurs->n_subs;i++) { 6239 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6240 } 6241 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6242 } 6243 } 6244 if (pcbddc->switch_static) { /* need to save the local change */ 6245 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6246 } else { 6247 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6248 } 6249 /* determine if any process has changed the pressures locally */ 6250 pcbddc->change_interior = pcbddc->benign_have_null; 6251 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6252 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6253 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6254 pcbddc->use_qr_single = qr_needed; 6255 } 6256 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6257 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6258 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6259 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6260 } else { 6261 Mat benign_global = NULL; 6262 if (pcbddc->benign_have_null) { 6263 Mat tmat; 6264 6265 pcbddc->change_interior = PETSC_TRUE; 6266 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6267 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6268 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6269 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6270 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6271 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6272 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6273 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6274 if (pcbddc->benign_change) { 6275 Mat M; 6276 6277 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6278 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6279 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6280 ierr = MatDestroy(&M);CHKERRQ(ierr); 6281 } else { 6282 Mat eye; 6283 PetscScalar *array; 6284 6285 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6286 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6287 for (i=0;i<pcis->n;i++) { 6288 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6289 } 6290 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6291 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6292 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6293 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6294 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6295 } 6296 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6297 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6298 } 6299 if (pcbddc->user_ChangeOfBasisMatrix) { 6300 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6301 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6302 } else if (pcbddc->benign_have_null) { 6303 pcbddc->ChangeOfBasisMatrix = benign_global; 6304 } 6305 } 6306 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6307 IS is_global; 6308 const PetscInt *gidxs; 6309 6310 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6311 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6312 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6313 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6314 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6315 } 6316 } 6317 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6318 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6319 } 6320 6321 if (!pcbddc->fake_change) { 6322 /* add pressure dofs to set of primal nodes for numbering purposes */ 6323 for (i=0;i<pcbddc->benign_n;i++) { 6324 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6325 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6326 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6327 pcbddc->local_primal_size_cc++; 6328 pcbddc->local_primal_size++; 6329 } 6330 6331 /* check if a new primal space has been introduced (also take into account benign trick) */ 6332 pcbddc->new_primal_space_local = PETSC_TRUE; 6333 if (olocal_primal_size == pcbddc->local_primal_size) { 6334 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6335 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6336 if (!pcbddc->new_primal_space_local) { 6337 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6338 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6339 } 6340 } 6341 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6342 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6343 } 6344 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6345 6346 /* flush dbg viewer */ 6347 if (pcbddc->dbg_flag) { 6348 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6349 } 6350 6351 /* free workspace */ 6352 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6353 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6354 if (!pcbddc->adaptive_selection) { 6355 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6356 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6357 } else { 6358 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6359 pcbddc->adaptive_constraints_idxs_ptr, 6360 pcbddc->adaptive_constraints_data_ptr, 6361 pcbddc->adaptive_constraints_idxs, 6362 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6363 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6364 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6365 } 6366 PetscFunctionReturn(0); 6367 } 6368 6369 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6370 { 6371 ISLocalToGlobalMapping map; 6372 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6373 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6374 PetscInt i,N; 6375 PetscBool rcsr = PETSC_FALSE; 6376 PetscErrorCode ierr; 6377 6378 PetscFunctionBegin; 6379 if (pcbddc->recompute_topography) { 6380 pcbddc->graphanalyzed = PETSC_FALSE; 6381 /* Reset previously computed graph */ 6382 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6383 /* Init local Graph struct */ 6384 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6385 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6386 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6387 6388 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6389 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6390 } 6391 /* Check validity of the csr graph passed in by the user */ 6392 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6393 6394 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6395 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6396 PetscInt *xadj,*adjncy; 6397 PetscInt nvtxs; 6398 PetscBool flg_row=PETSC_FALSE; 6399 6400 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6401 if (flg_row) { 6402 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6403 pcbddc->computed_rowadj = PETSC_TRUE; 6404 } 6405 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6406 rcsr = PETSC_TRUE; 6407 } 6408 if (pcbddc->dbg_flag) { 6409 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6410 } 6411 6412 /* Setup of Graph */ 6413 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6414 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6415 6416 /* attach info on disconnected subdomains if present */ 6417 if (pcbddc->n_local_subs) { 6418 PetscInt *local_subs; 6419 6420 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6421 for (i=0;i<pcbddc->n_local_subs;i++) { 6422 const PetscInt *idxs; 6423 PetscInt nl,j; 6424 6425 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6426 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6427 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6428 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6429 } 6430 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6431 pcbddc->mat_graph->local_subs = local_subs; 6432 } 6433 } 6434 6435 if (!pcbddc->graphanalyzed) { 6436 /* Graph's connected components analysis */ 6437 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6438 pcbddc->graphanalyzed = PETSC_TRUE; 6439 } 6440 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6441 PetscFunctionReturn(0); 6442 } 6443 6444 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6445 { 6446 PetscInt i,j; 6447 PetscScalar *alphas; 6448 PetscErrorCode ierr; 6449 6450 PetscFunctionBegin; 6451 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6452 for (i=0;i<n;i++) { 6453 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6454 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6455 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6456 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6457 } 6458 ierr = PetscFree(alphas);CHKERRQ(ierr); 6459 PetscFunctionReturn(0); 6460 } 6461 6462 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6463 { 6464 Mat A; 6465 PetscInt n_neighs,*neighs,*n_shared,**shared; 6466 PetscMPIInt size,rank,color; 6467 PetscInt *xadj,*adjncy; 6468 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6469 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6470 PetscInt void_procs,*procs_candidates = NULL; 6471 PetscInt xadj_count,*count; 6472 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6473 PetscSubcomm psubcomm; 6474 MPI_Comm subcomm; 6475 PetscErrorCode ierr; 6476 6477 PetscFunctionBegin; 6478 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6479 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6480 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6481 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6482 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6483 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6484 6485 if (have_void) *have_void = PETSC_FALSE; 6486 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6487 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6488 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6489 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6490 im_active = !!n; 6491 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6492 void_procs = size - active_procs; 6493 /* get ranks of of non-active processes in mat communicator */ 6494 if (void_procs) { 6495 PetscInt ncand; 6496 6497 if (have_void) *have_void = PETSC_TRUE; 6498 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6499 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6500 for (i=0,ncand=0;i<size;i++) { 6501 if (!procs_candidates[i]) { 6502 procs_candidates[ncand++] = i; 6503 } 6504 } 6505 /* force n_subdomains to be not greater that the number of non-active processes */ 6506 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6507 } 6508 6509 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6510 number of subdomains requested 1 -> send to master or first candidate in voids */ 6511 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6512 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6513 PetscInt issize,isidx,dest; 6514 if (*n_subdomains == 1) dest = 0; 6515 else dest = rank; 6516 if (im_active) { 6517 issize = 1; 6518 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6519 isidx = procs_candidates[dest]; 6520 } else { 6521 isidx = dest; 6522 } 6523 } else { 6524 issize = 0; 6525 isidx = -1; 6526 } 6527 if (*n_subdomains != 1) *n_subdomains = active_procs; 6528 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6529 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6530 PetscFunctionReturn(0); 6531 } 6532 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6533 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6534 threshold = PetscMax(threshold,2); 6535 6536 /* Get info on mapping */ 6537 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6538 6539 /* build local CSR graph of subdomains' connectivity */ 6540 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6541 xadj[0] = 0; 6542 xadj[1] = PetscMax(n_neighs-1,0); 6543 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6544 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6545 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6546 for (i=1;i<n_neighs;i++) 6547 for (j=0;j<n_shared[i];j++) 6548 count[shared[i][j]] += 1; 6549 6550 xadj_count = 0; 6551 for (i=1;i<n_neighs;i++) { 6552 for (j=0;j<n_shared[i];j++) { 6553 if (count[shared[i][j]] < threshold) { 6554 adjncy[xadj_count] = neighs[i]; 6555 adjncy_wgt[xadj_count] = n_shared[i]; 6556 xadj_count++; 6557 break; 6558 } 6559 } 6560 } 6561 xadj[1] = xadj_count; 6562 ierr = PetscFree(count);CHKERRQ(ierr); 6563 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6564 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6565 6566 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6567 6568 /* Restrict work on active processes only */ 6569 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6570 if (void_procs) { 6571 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6572 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6573 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6574 subcomm = PetscSubcommChild(psubcomm); 6575 } else { 6576 psubcomm = NULL; 6577 subcomm = PetscObjectComm((PetscObject)mat); 6578 } 6579 6580 v_wgt = NULL; 6581 if (!color) { 6582 ierr = PetscFree(xadj);CHKERRQ(ierr); 6583 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6584 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6585 } else { 6586 Mat subdomain_adj; 6587 IS new_ranks,new_ranks_contig; 6588 MatPartitioning partitioner; 6589 PetscInt rstart=0,rend=0; 6590 PetscInt *is_indices,*oldranks; 6591 PetscMPIInt size; 6592 PetscBool aggregate; 6593 6594 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6595 if (void_procs) { 6596 PetscInt prank = rank; 6597 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6598 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6599 for (i=0;i<xadj[1];i++) { 6600 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6601 } 6602 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6603 } else { 6604 oldranks = NULL; 6605 } 6606 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6607 if (aggregate) { /* TODO: all this part could be made more efficient */ 6608 PetscInt lrows,row,ncols,*cols; 6609 PetscMPIInt nrank; 6610 PetscScalar *vals; 6611 6612 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6613 lrows = 0; 6614 if (nrank<redprocs) { 6615 lrows = size/redprocs; 6616 if (nrank<size%redprocs) lrows++; 6617 } 6618 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6619 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6620 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6621 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6622 row = nrank; 6623 ncols = xadj[1]-xadj[0]; 6624 cols = adjncy; 6625 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6626 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6627 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6628 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6629 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6630 ierr = PetscFree(xadj);CHKERRQ(ierr); 6631 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6632 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6633 ierr = PetscFree(vals);CHKERRQ(ierr); 6634 if (use_vwgt) { 6635 Vec v; 6636 const PetscScalar *array; 6637 PetscInt nl; 6638 6639 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6640 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6641 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6642 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6643 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6644 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6645 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6646 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6647 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6648 ierr = VecDestroy(&v);CHKERRQ(ierr); 6649 } 6650 } else { 6651 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6652 if (use_vwgt) { 6653 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6654 v_wgt[0] = n; 6655 } 6656 } 6657 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6658 6659 /* Partition */ 6660 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6661 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6662 if (v_wgt) { 6663 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6664 } 6665 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6666 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6667 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6668 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6669 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6670 6671 /* renumber new_ranks to avoid "holes" in new set of processors */ 6672 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6673 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6674 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6675 if (!aggregate) { 6676 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6677 #if defined(PETSC_USE_DEBUG) 6678 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6679 #endif 6680 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6681 } else if (oldranks) { 6682 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6683 } else { 6684 ranks_send_to_idx[0] = is_indices[0]; 6685 } 6686 } else { 6687 PetscInt idxs[1]; 6688 PetscMPIInt tag; 6689 MPI_Request *reqs; 6690 6691 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6692 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6693 for (i=rstart;i<rend;i++) { 6694 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6695 } 6696 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6697 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6698 ierr = PetscFree(reqs);CHKERRQ(ierr); 6699 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6700 #if defined(PETSC_USE_DEBUG) 6701 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6702 #endif 6703 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6704 } else if (oldranks) { 6705 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6706 } else { 6707 ranks_send_to_idx[0] = idxs[0]; 6708 } 6709 } 6710 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6711 /* clean up */ 6712 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6713 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6714 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6715 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6716 } 6717 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6718 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6719 6720 /* assemble parallel IS for sends */ 6721 i = 1; 6722 if (!color) i=0; 6723 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6724 PetscFunctionReturn(0); 6725 } 6726 6727 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6728 6729 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6730 { 6731 Mat local_mat; 6732 IS is_sends_internal; 6733 PetscInt rows,cols,new_local_rows; 6734 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6735 PetscBool ismatis,isdense,newisdense,destroy_mat; 6736 ISLocalToGlobalMapping l2gmap; 6737 PetscInt* l2gmap_indices; 6738 const PetscInt* is_indices; 6739 MatType new_local_type; 6740 /* buffers */ 6741 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6742 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6743 PetscInt *recv_buffer_idxs_local; 6744 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6745 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6746 /* MPI */ 6747 MPI_Comm comm,comm_n; 6748 PetscSubcomm subcomm; 6749 PetscMPIInt n_sends,n_recvs,commsize; 6750 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6751 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6752 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6753 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6754 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6755 PetscErrorCode ierr; 6756 6757 PetscFunctionBegin; 6758 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6759 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6760 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6761 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6762 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6763 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6764 PetscValidLogicalCollectiveBool(mat,reuse,6); 6765 PetscValidLogicalCollectiveInt(mat,nis,8); 6766 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6767 if (nvecs) { 6768 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6769 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6770 } 6771 /* further checks */ 6772 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6773 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6774 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6775 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6776 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6777 if (reuse && *mat_n) { 6778 PetscInt mrows,mcols,mnrows,mncols; 6779 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6780 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6781 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6782 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6783 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6784 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6785 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6786 } 6787 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6788 PetscValidLogicalCollectiveInt(mat,bs,0); 6789 6790 /* prepare IS for sending if not provided */ 6791 if (!is_sends) { 6792 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6793 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6794 } else { 6795 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6796 is_sends_internal = is_sends; 6797 } 6798 6799 /* get comm */ 6800 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6801 6802 /* compute number of sends */ 6803 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6804 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6805 6806 /* compute number of receives */ 6807 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6808 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6809 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6810 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6811 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6812 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6813 ierr = PetscFree(iflags);CHKERRQ(ierr); 6814 6815 /* restrict comm if requested */ 6816 subcomm = 0; 6817 destroy_mat = PETSC_FALSE; 6818 if (restrict_comm) { 6819 PetscMPIInt color,subcommsize; 6820 6821 color = 0; 6822 if (restrict_full) { 6823 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6824 } else { 6825 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6826 } 6827 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6828 subcommsize = commsize - subcommsize; 6829 /* check if reuse has been requested */ 6830 if (reuse) { 6831 if (*mat_n) { 6832 PetscMPIInt subcommsize2; 6833 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6834 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6835 comm_n = PetscObjectComm((PetscObject)*mat_n); 6836 } else { 6837 comm_n = PETSC_COMM_SELF; 6838 } 6839 } else { /* MAT_INITIAL_MATRIX */ 6840 PetscMPIInt rank; 6841 6842 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6843 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6844 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6845 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6846 comm_n = PetscSubcommChild(subcomm); 6847 } 6848 /* flag to destroy *mat_n if not significative */ 6849 if (color) destroy_mat = PETSC_TRUE; 6850 } else { 6851 comm_n = comm; 6852 } 6853 6854 /* prepare send/receive buffers */ 6855 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6856 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6857 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6858 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6859 if (nis) { 6860 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6861 } 6862 6863 /* Get data from local matrices */ 6864 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6865 /* TODO: See below some guidelines on how to prepare the local buffers */ 6866 /* 6867 send_buffer_vals should contain the raw values of the local matrix 6868 send_buffer_idxs should contain: 6869 - MatType_PRIVATE type 6870 - PetscInt size_of_l2gmap 6871 - PetscInt global_row_indices[size_of_l2gmap] 6872 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6873 */ 6874 else { 6875 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6876 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6877 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6878 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6879 send_buffer_idxs[1] = i; 6880 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6881 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6882 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6883 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6884 for (i=0;i<n_sends;i++) { 6885 ilengths_vals[is_indices[i]] = len*len; 6886 ilengths_idxs[is_indices[i]] = len+2; 6887 } 6888 } 6889 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6890 /* additional is (if any) */ 6891 if (nis) { 6892 PetscMPIInt psum; 6893 PetscInt j; 6894 for (j=0,psum=0;j<nis;j++) { 6895 PetscInt plen; 6896 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6897 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6898 psum += len+1; /* indices + lenght */ 6899 } 6900 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6901 for (j=0,psum=0;j<nis;j++) { 6902 PetscInt plen; 6903 const PetscInt *is_array_idxs; 6904 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6905 send_buffer_idxs_is[psum] = plen; 6906 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6907 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6908 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6909 psum += plen+1; /* indices + lenght */ 6910 } 6911 for (i=0;i<n_sends;i++) { 6912 ilengths_idxs_is[is_indices[i]] = psum; 6913 } 6914 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6915 } 6916 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 6917 6918 buf_size_idxs = 0; 6919 buf_size_vals = 0; 6920 buf_size_idxs_is = 0; 6921 buf_size_vecs = 0; 6922 for (i=0;i<n_recvs;i++) { 6923 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6924 buf_size_vals += (PetscInt)olengths_vals[i]; 6925 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6926 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6927 } 6928 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6929 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6930 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6931 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6932 6933 /* get new tags for clean communications */ 6934 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6935 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6936 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6937 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6938 6939 /* allocate for requests */ 6940 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6941 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6942 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6943 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6944 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6945 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6946 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6947 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6948 6949 /* communications */ 6950 ptr_idxs = recv_buffer_idxs; 6951 ptr_vals = recv_buffer_vals; 6952 ptr_idxs_is = recv_buffer_idxs_is; 6953 ptr_vecs = recv_buffer_vecs; 6954 for (i=0;i<n_recvs;i++) { 6955 source_dest = onodes[i]; 6956 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6957 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6958 ptr_idxs += olengths_idxs[i]; 6959 ptr_vals += olengths_vals[i]; 6960 if (nis) { 6961 source_dest = onodes_is[i]; 6962 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6963 ptr_idxs_is += olengths_idxs_is[i]; 6964 } 6965 if (nvecs) { 6966 source_dest = onodes[i]; 6967 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6968 ptr_vecs += olengths_idxs[i]-2; 6969 } 6970 } 6971 for (i=0;i<n_sends;i++) { 6972 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6973 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6974 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6975 if (nis) { 6976 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6977 } 6978 if (nvecs) { 6979 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6980 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6981 } 6982 } 6983 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6984 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6985 6986 /* assemble new l2g map */ 6987 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6988 ptr_idxs = recv_buffer_idxs; 6989 new_local_rows = 0; 6990 for (i=0;i<n_recvs;i++) { 6991 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6992 ptr_idxs += olengths_idxs[i]; 6993 } 6994 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6995 ptr_idxs = recv_buffer_idxs; 6996 new_local_rows = 0; 6997 for (i=0;i<n_recvs;i++) { 6998 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6999 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7000 ptr_idxs += olengths_idxs[i]; 7001 } 7002 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7003 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7004 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7005 7006 /* infer new local matrix type from received local matrices type */ 7007 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7008 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7009 if (n_recvs) { 7010 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7011 ptr_idxs = recv_buffer_idxs; 7012 for (i=0;i<n_recvs;i++) { 7013 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7014 new_local_type_private = MATAIJ_PRIVATE; 7015 break; 7016 } 7017 ptr_idxs += olengths_idxs[i]; 7018 } 7019 switch (new_local_type_private) { 7020 case MATDENSE_PRIVATE: 7021 new_local_type = MATSEQAIJ; 7022 bs = 1; 7023 break; 7024 case MATAIJ_PRIVATE: 7025 new_local_type = MATSEQAIJ; 7026 bs = 1; 7027 break; 7028 case MATBAIJ_PRIVATE: 7029 new_local_type = MATSEQBAIJ; 7030 break; 7031 case MATSBAIJ_PRIVATE: 7032 new_local_type = MATSEQSBAIJ; 7033 break; 7034 default: 7035 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7036 break; 7037 } 7038 } else { /* by default, new_local_type is seqaij */ 7039 new_local_type = MATSEQAIJ; 7040 bs = 1; 7041 } 7042 7043 /* create MATIS object if needed */ 7044 if (!reuse) { 7045 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7046 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7047 } else { 7048 /* it also destroys the local matrices */ 7049 if (*mat_n) { 7050 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7051 } else { /* this is a fake object */ 7052 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7053 } 7054 } 7055 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7056 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7057 7058 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7059 7060 /* Global to local map of received indices */ 7061 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7062 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7063 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7064 7065 /* restore attributes -> type of incoming data and its size */ 7066 buf_size_idxs = 0; 7067 for (i=0;i<n_recvs;i++) { 7068 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7069 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7070 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7071 } 7072 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7073 7074 /* set preallocation */ 7075 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7076 if (!newisdense) { 7077 PetscInt *new_local_nnz=0; 7078 7079 ptr_idxs = recv_buffer_idxs_local; 7080 if (n_recvs) { 7081 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7082 } 7083 for (i=0;i<n_recvs;i++) { 7084 PetscInt j; 7085 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7086 for (j=0;j<*(ptr_idxs+1);j++) { 7087 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7088 } 7089 } else { 7090 /* TODO */ 7091 } 7092 ptr_idxs += olengths_idxs[i]; 7093 } 7094 if (new_local_nnz) { 7095 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7096 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7097 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7098 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7099 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7100 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7101 } else { 7102 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7103 } 7104 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7105 } else { 7106 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7107 } 7108 7109 /* set values */ 7110 ptr_vals = recv_buffer_vals; 7111 ptr_idxs = recv_buffer_idxs_local; 7112 for (i=0;i<n_recvs;i++) { 7113 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7114 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7115 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7116 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7117 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7118 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7119 } else { 7120 /* TODO */ 7121 } 7122 ptr_idxs += olengths_idxs[i]; 7123 ptr_vals += olengths_vals[i]; 7124 } 7125 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7126 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7127 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7128 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7129 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7130 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7131 7132 #if 0 7133 if (!restrict_comm) { /* check */ 7134 Vec lvec,rvec; 7135 PetscReal infty_error; 7136 7137 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7138 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7139 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7140 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7141 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7142 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7143 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7144 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7145 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7146 } 7147 #endif 7148 7149 /* assemble new additional is (if any) */ 7150 if (nis) { 7151 PetscInt **temp_idxs,*count_is,j,psum; 7152 7153 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7154 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7155 ptr_idxs = recv_buffer_idxs_is; 7156 psum = 0; 7157 for (i=0;i<n_recvs;i++) { 7158 for (j=0;j<nis;j++) { 7159 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7160 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7161 psum += plen; 7162 ptr_idxs += plen+1; /* shift pointer to received data */ 7163 } 7164 } 7165 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7166 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7167 for (i=1;i<nis;i++) { 7168 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7169 } 7170 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7171 ptr_idxs = recv_buffer_idxs_is; 7172 for (i=0;i<n_recvs;i++) { 7173 for (j=0;j<nis;j++) { 7174 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7175 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7176 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7177 ptr_idxs += plen+1; /* shift pointer to received data */ 7178 } 7179 } 7180 for (i=0;i<nis;i++) { 7181 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7182 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7183 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7184 } 7185 ierr = PetscFree(count_is);CHKERRQ(ierr); 7186 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7187 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7188 } 7189 /* free workspace */ 7190 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7191 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7192 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7193 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7194 if (isdense) { 7195 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7196 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7197 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7198 } else { 7199 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7200 } 7201 if (nis) { 7202 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7203 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7204 } 7205 7206 if (nvecs) { 7207 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7208 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7209 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7210 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7211 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7212 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7213 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7214 /* set values */ 7215 ptr_vals = recv_buffer_vecs; 7216 ptr_idxs = recv_buffer_idxs_local; 7217 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7218 for (i=0;i<n_recvs;i++) { 7219 PetscInt j; 7220 for (j=0;j<*(ptr_idxs+1);j++) { 7221 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7222 } 7223 ptr_idxs += olengths_idxs[i]; 7224 ptr_vals += olengths_idxs[i]-2; 7225 } 7226 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7227 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7228 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7229 } 7230 7231 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7232 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7233 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7234 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7235 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7236 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7237 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7238 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7239 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7240 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7241 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7242 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7243 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7244 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7245 ierr = PetscFree(onodes);CHKERRQ(ierr); 7246 if (nis) { 7247 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7248 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7249 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7250 } 7251 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7252 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7253 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7254 for (i=0;i<nis;i++) { 7255 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7256 } 7257 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7258 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7259 } 7260 *mat_n = NULL; 7261 } 7262 PetscFunctionReturn(0); 7263 } 7264 7265 /* temporary hack into ksp private data structure */ 7266 #include <petsc/private/kspimpl.h> 7267 7268 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7269 { 7270 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7271 PC_IS *pcis = (PC_IS*)pc->data; 7272 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7273 Mat coarsedivudotp = NULL; 7274 Mat coarseG,t_coarse_mat_is; 7275 MatNullSpace CoarseNullSpace = NULL; 7276 ISLocalToGlobalMapping coarse_islg; 7277 IS coarse_is,*isarray; 7278 PetscInt i,im_active=-1,active_procs=-1; 7279 PetscInt nis,nisdofs,nisneu,nisvert; 7280 PC pc_temp; 7281 PCType coarse_pc_type; 7282 KSPType coarse_ksp_type; 7283 PetscBool multilevel_requested,multilevel_allowed; 7284 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7285 PetscInt ncoarse,nedcfield; 7286 PetscBool compute_vecs = PETSC_FALSE; 7287 PetscScalar *array; 7288 MatReuse coarse_mat_reuse; 7289 PetscBool restr, full_restr, have_void; 7290 PetscMPIInt commsize; 7291 PetscErrorCode ierr; 7292 7293 PetscFunctionBegin; 7294 /* Assign global numbering to coarse dofs */ 7295 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7296 PetscInt ocoarse_size; 7297 compute_vecs = PETSC_TRUE; 7298 7299 pcbddc->new_primal_space = PETSC_TRUE; 7300 ocoarse_size = pcbddc->coarse_size; 7301 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7302 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7303 /* see if we can avoid some work */ 7304 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7305 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7306 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7307 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7308 coarse_reuse = PETSC_FALSE; 7309 } else { /* we can safely reuse already computed coarse matrix */ 7310 coarse_reuse = PETSC_TRUE; 7311 } 7312 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7313 coarse_reuse = PETSC_FALSE; 7314 } 7315 /* reset any subassembling information */ 7316 if (!coarse_reuse || pcbddc->recompute_topography) { 7317 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7318 } 7319 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7320 coarse_reuse = PETSC_TRUE; 7321 } 7322 /* assemble coarse matrix */ 7323 if (coarse_reuse && pcbddc->coarse_ksp) { 7324 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7325 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7326 coarse_mat_reuse = MAT_REUSE_MATRIX; 7327 } else { 7328 coarse_mat = NULL; 7329 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7330 } 7331 7332 /* creates temporary l2gmap and IS for coarse indexes */ 7333 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7334 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7335 7336 /* creates temporary MATIS object for coarse matrix */ 7337 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7338 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7339 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7340 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7341 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7342 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7343 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7344 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7345 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7346 7347 /* count "active" (i.e. with positive local size) and "void" processes */ 7348 im_active = !!(pcis->n); 7349 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7350 7351 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7352 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7353 /* full_restr : just use the receivers from the subassembling pattern */ 7354 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7355 coarse_mat_is = NULL; 7356 multilevel_allowed = PETSC_FALSE; 7357 multilevel_requested = PETSC_FALSE; 7358 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7359 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7360 if (multilevel_requested) { 7361 ncoarse = active_procs/pcbddc->coarsening_ratio; 7362 restr = PETSC_FALSE; 7363 full_restr = PETSC_FALSE; 7364 } else { 7365 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7366 restr = PETSC_TRUE; 7367 full_restr = PETSC_TRUE; 7368 } 7369 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7370 ncoarse = PetscMax(1,ncoarse); 7371 if (!pcbddc->coarse_subassembling) { 7372 if (pcbddc->coarsening_ratio > 1) { 7373 if (multilevel_requested) { 7374 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7375 } else { 7376 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7377 } 7378 } else { 7379 PetscMPIInt rank; 7380 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7381 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7382 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7383 } 7384 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7385 PetscInt psum; 7386 if (pcbddc->coarse_ksp) psum = 1; 7387 else psum = 0; 7388 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7389 if (ncoarse < commsize) have_void = PETSC_TRUE; 7390 } 7391 /* determine if we can go multilevel */ 7392 if (multilevel_requested) { 7393 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7394 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7395 } 7396 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7397 7398 /* dump subassembling pattern */ 7399 if (pcbddc->dbg_flag && multilevel_allowed) { 7400 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7401 } 7402 7403 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7404 nedcfield = -1; 7405 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7406 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7407 const PetscInt *idxs; 7408 ISLocalToGlobalMapping tmap; 7409 7410 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7411 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7412 /* allocate space for temporary storage */ 7413 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7414 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7415 /* allocate for IS array */ 7416 nisdofs = pcbddc->n_ISForDofsLocal; 7417 if (pcbddc->nedclocal) { 7418 if (pcbddc->nedfield > -1) { 7419 nedcfield = pcbddc->nedfield; 7420 } else { 7421 nedcfield = 0; 7422 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7423 nisdofs = 1; 7424 } 7425 } 7426 nisneu = !!pcbddc->NeumannBoundariesLocal; 7427 nisvert = 0; /* nisvert is not used */ 7428 nis = nisdofs + nisneu + nisvert; 7429 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7430 /* dofs splitting */ 7431 for (i=0;i<nisdofs;i++) { 7432 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7433 if (nedcfield != i) { 7434 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7435 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7436 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7437 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7438 } else { 7439 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7440 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7441 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7442 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7443 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7444 } 7445 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7446 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7447 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7448 } 7449 /* neumann boundaries */ 7450 if (pcbddc->NeumannBoundariesLocal) { 7451 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7452 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7453 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7454 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7455 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7456 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7457 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7458 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7459 } 7460 /* free memory */ 7461 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7462 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7463 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7464 } else { 7465 nis = 0; 7466 nisdofs = 0; 7467 nisneu = 0; 7468 nisvert = 0; 7469 isarray = NULL; 7470 } 7471 /* destroy no longer needed map */ 7472 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7473 7474 /* subassemble */ 7475 if (multilevel_allowed) { 7476 Vec vp[1]; 7477 PetscInt nvecs = 0; 7478 PetscBool reuse,reuser; 7479 7480 if (coarse_mat) reuse = PETSC_TRUE; 7481 else reuse = PETSC_FALSE; 7482 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7483 vp[0] = NULL; 7484 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7485 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7486 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7487 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7488 nvecs = 1; 7489 7490 if (pcbddc->divudotp) { 7491 Mat B,loc_divudotp; 7492 Vec v,p; 7493 IS dummy; 7494 PetscInt np; 7495 7496 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7497 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7498 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7499 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7500 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7501 ierr = VecSet(p,1.);CHKERRQ(ierr); 7502 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7503 ierr = VecDestroy(&p);CHKERRQ(ierr); 7504 ierr = MatDestroy(&B);CHKERRQ(ierr); 7505 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7506 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7507 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7508 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7509 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7510 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7511 ierr = VecDestroy(&v);CHKERRQ(ierr); 7512 } 7513 } 7514 if (reuser) { 7515 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7516 } else { 7517 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7518 } 7519 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7520 PetscScalar *arraym,*arrayv; 7521 PetscInt nl; 7522 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7523 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7524 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7525 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7526 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7527 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7528 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7529 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7530 } else { 7531 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7532 } 7533 } else { 7534 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7535 } 7536 if (coarse_mat_is || coarse_mat) { 7537 PetscMPIInt size; 7538 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7539 if (!multilevel_allowed) { 7540 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7541 } else { 7542 Mat A; 7543 7544 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7545 if (coarse_mat_is) { 7546 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7547 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7548 coarse_mat = coarse_mat_is; 7549 } 7550 /* be sure we don't have MatSeqDENSE as local mat */ 7551 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7552 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7553 } 7554 } 7555 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7556 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7557 7558 /* create local to global scatters for coarse problem */ 7559 if (compute_vecs) { 7560 PetscInt lrows; 7561 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7562 if (coarse_mat) { 7563 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7564 } else { 7565 lrows = 0; 7566 } 7567 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7568 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7569 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7570 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7571 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7572 } 7573 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7574 7575 /* set defaults for coarse KSP and PC */ 7576 if (multilevel_allowed) { 7577 coarse_ksp_type = KSPRICHARDSON; 7578 coarse_pc_type = PCBDDC; 7579 } else { 7580 coarse_ksp_type = KSPPREONLY; 7581 coarse_pc_type = PCREDUNDANT; 7582 } 7583 7584 /* print some info if requested */ 7585 if (pcbddc->dbg_flag) { 7586 if (!multilevel_allowed) { 7587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7588 if (multilevel_requested) { 7589 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7590 } else if (pcbddc->max_levels) { 7591 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7592 } 7593 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7594 } 7595 } 7596 7597 /* communicate coarse discrete gradient */ 7598 coarseG = NULL; 7599 if (pcbddc->nedcG && multilevel_allowed) { 7600 MPI_Comm ccomm; 7601 if (coarse_mat) { 7602 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7603 } else { 7604 ccomm = MPI_COMM_NULL; 7605 } 7606 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7607 } 7608 7609 /* create the coarse KSP object only once with defaults */ 7610 if (coarse_mat) { 7611 PetscViewer dbg_viewer = NULL; 7612 if (pcbddc->dbg_flag) { 7613 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7614 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7615 } 7616 if (!pcbddc->coarse_ksp) { 7617 char prefix[256],str_level[16]; 7618 size_t len; 7619 7620 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7621 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7622 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7623 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7624 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7625 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7626 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7627 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7628 /* TODO is this logic correct? should check for coarse_mat type */ 7629 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7630 /* prefix */ 7631 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7632 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7633 if (!pcbddc->current_level) { 7634 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7635 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7636 } else { 7637 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7638 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7639 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7640 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7641 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7642 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7643 } 7644 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7645 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7646 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7647 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7648 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7649 /* allow user customization */ 7650 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7651 } 7652 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7653 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7654 if (nisdofs) { 7655 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7656 for (i=0;i<nisdofs;i++) { 7657 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7658 } 7659 } 7660 if (nisneu) { 7661 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7662 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7663 } 7664 if (nisvert) { 7665 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7666 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7667 } 7668 if (coarseG) { 7669 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7670 } 7671 7672 /* get some info after set from options */ 7673 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7674 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7675 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7676 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7677 if (isbddc && !multilevel_allowed) { 7678 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7679 isbddc = PETSC_FALSE; 7680 } 7681 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7682 if (multilevel_requested && !isbddc && !isnn) { 7683 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7684 isbddc = PETSC_TRUE; 7685 isnn = PETSC_FALSE; 7686 } 7687 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7688 if (isredundant) { 7689 KSP inner_ksp; 7690 PC inner_pc; 7691 7692 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7693 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7694 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7695 } 7696 7697 /* parameters which miss an API */ 7698 if (isbddc) { 7699 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7700 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7701 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7702 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7703 if (pcbddc_coarse->benign_saddle_point) { 7704 Mat coarsedivudotp_is; 7705 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7706 IS row,col; 7707 const PetscInt *gidxs; 7708 PetscInt n,st,M,N; 7709 7710 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7711 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7712 st = st-n; 7713 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7714 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7715 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7716 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7717 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7718 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7719 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7720 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7721 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7722 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7723 ierr = ISDestroy(&row);CHKERRQ(ierr); 7724 ierr = ISDestroy(&col);CHKERRQ(ierr); 7725 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7726 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7727 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7728 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7729 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7730 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7731 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7732 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7733 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7734 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7735 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7736 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7737 } 7738 } 7739 7740 /* propagate symmetry info of coarse matrix */ 7741 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7742 if (pc->pmat->symmetric_set) { 7743 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7744 } 7745 if (pc->pmat->hermitian_set) { 7746 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7747 } 7748 if (pc->pmat->spd_set) { 7749 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7750 } 7751 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7752 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7753 } 7754 /* set operators */ 7755 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7756 if (pcbddc->dbg_flag) { 7757 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7758 } 7759 } 7760 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7761 ierr = PetscFree(isarray);CHKERRQ(ierr); 7762 #if 0 7763 { 7764 PetscViewer viewer; 7765 char filename[256]; 7766 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7767 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7768 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7769 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7770 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7771 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7772 } 7773 #endif 7774 7775 if (pcbddc->coarse_ksp) { 7776 Vec crhs,csol; 7777 7778 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7779 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7780 if (!csol) { 7781 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7782 } 7783 if (!crhs) { 7784 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7785 } 7786 } 7787 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7788 7789 /* compute null space for coarse solver if the benign trick has been requested */ 7790 if (pcbddc->benign_null) { 7791 7792 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7793 for (i=0;i<pcbddc->benign_n;i++) { 7794 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7795 } 7796 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7797 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7798 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7799 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7800 if (coarse_mat) { 7801 Vec nullv; 7802 PetscScalar *array,*array2; 7803 PetscInt nl; 7804 7805 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7806 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7807 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7808 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7809 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7810 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7811 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7812 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7813 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7814 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7815 } 7816 } 7817 7818 if (pcbddc->coarse_ksp) { 7819 PetscBool ispreonly; 7820 7821 if (CoarseNullSpace) { 7822 PetscBool isnull; 7823 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7824 if (isnull) { 7825 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7826 } 7827 /* TODO: add local nullspaces (if any) */ 7828 } 7829 /* setup coarse ksp */ 7830 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7831 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7832 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7833 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7834 KSP check_ksp; 7835 KSPType check_ksp_type; 7836 PC check_pc; 7837 Vec check_vec,coarse_vec; 7838 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7839 PetscInt its; 7840 PetscBool compute_eigs; 7841 PetscReal *eigs_r,*eigs_c; 7842 PetscInt neigs; 7843 const char *prefix; 7844 7845 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7846 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7847 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7848 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7849 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7850 /* prevent from setup unneeded object */ 7851 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7852 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7853 if (ispreonly) { 7854 check_ksp_type = KSPPREONLY; 7855 compute_eigs = PETSC_FALSE; 7856 } else { 7857 check_ksp_type = KSPGMRES; 7858 compute_eigs = PETSC_TRUE; 7859 } 7860 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7861 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7862 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7863 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7864 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7865 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7866 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7867 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7868 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7869 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7870 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7871 /* create random vec */ 7872 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7873 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7874 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7875 /* solve coarse problem */ 7876 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7877 /* set eigenvalue estimation if preonly has not been requested */ 7878 if (compute_eigs) { 7879 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7880 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7881 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7882 if (neigs) { 7883 lambda_max = eigs_r[neigs-1]; 7884 lambda_min = eigs_r[0]; 7885 if (pcbddc->use_coarse_estimates) { 7886 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7887 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7888 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7889 } 7890 } 7891 } 7892 } 7893 7894 /* check coarse problem residual error */ 7895 if (pcbddc->dbg_flag) { 7896 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7897 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7898 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7899 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7900 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7901 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7902 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7903 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7904 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7905 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7906 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7907 if (CoarseNullSpace) { 7908 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7909 } 7910 if (compute_eigs) { 7911 PetscReal lambda_max_s,lambda_min_s; 7912 KSPConvergedReason reason; 7913 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7914 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7915 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7916 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7917 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7918 for (i=0;i<neigs;i++) { 7919 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7920 } 7921 } 7922 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7923 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7924 } 7925 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7926 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7927 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7928 if (compute_eigs) { 7929 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7930 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7931 } 7932 } 7933 } 7934 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7935 /* print additional info */ 7936 if (pcbddc->dbg_flag) { 7937 /* waits until all processes reaches this point */ 7938 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7939 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7940 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7941 } 7942 7943 /* free memory */ 7944 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7945 PetscFunctionReturn(0); 7946 } 7947 7948 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7949 { 7950 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7951 PC_IS* pcis = (PC_IS*)pc->data; 7952 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7953 IS subset,subset_mult,subset_n; 7954 PetscInt local_size,coarse_size=0; 7955 PetscInt *local_primal_indices=NULL; 7956 const PetscInt *t_local_primal_indices; 7957 PetscErrorCode ierr; 7958 7959 PetscFunctionBegin; 7960 /* Compute global number of coarse dofs */ 7961 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7962 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7963 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7964 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7965 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7966 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7967 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7968 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7969 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7970 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7971 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7972 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7973 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7974 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7975 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7976 7977 /* check numbering */ 7978 if (pcbddc->dbg_flag) { 7979 PetscScalar coarsesum,*array,*array2; 7980 PetscInt i; 7981 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7982 7983 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7984 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7985 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7986 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7987 /* counter */ 7988 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7989 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7990 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7991 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7992 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7993 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7994 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7995 for (i=0;i<pcbddc->local_primal_size;i++) { 7996 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7997 } 7998 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7999 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8000 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8001 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8002 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8003 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8004 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8005 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8006 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8007 for (i=0;i<pcis->n;i++) { 8008 if (array[i] != 0.0 && array[i] != array2[i]) { 8009 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8010 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8011 set_error = PETSC_TRUE; 8012 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8013 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8014 } 8015 } 8016 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8017 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8018 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8019 for (i=0;i<pcis->n;i++) { 8020 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8021 } 8022 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8023 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8024 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8025 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8026 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8027 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8028 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8029 PetscInt *gidxs; 8030 8031 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8032 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8033 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8034 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8035 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8036 for (i=0;i<pcbddc->local_primal_size;i++) { 8037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8038 } 8039 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8040 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8041 } 8042 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8043 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8044 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8045 } 8046 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8047 /* get back data */ 8048 *coarse_size_n = coarse_size; 8049 *local_primal_indices_n = local_primal_indices; 8050 PetscFunctionReturn(0); 8051 } 8052 8053 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8054 { 8055 IS localis_t; 8056 PetscInt i,lsize,*idxs,n; 8057 PetscScalar *vals; 8058 PetscErrorCode ierr; 8059 8060 PetscFunctionBegin; 8061 /* get indices in local ordering exploiting local to global map */ 8062 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8063 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8064 for (i=0;i<lsize;i++) vals[i] = 1.0; 8065 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8066 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8067 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8068 if (idxs) { /* multilevel guard */ 8069 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8070 } 8071 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8072 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8073 ierr = PetscFree(vals);CHKERRQ(ierr); 8074 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8075 /* now compute set in local ordering */ 8076 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8077 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8078 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8079 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8080 for (i=0,lsize=0;i<n;i++) { 8081 if (PetscRealPart(vals[i]) > 0.5) { 8082 lsize++; 8083 } 8084 } 8085 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8086 for (i=0,lsize=0;i<n;i++) { 8087 if (PetscRealPart(vals[i]) > 0.5) { 8088 idxs[lsize++] = i; 8089 } 8090 } 8091 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8092 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8093 *localis = localis_t; 8094 PetscFunctionReturn(0); 8095 } 8096 8097 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8098 { 8099 PC_IS *pcis=(PC_IS*)pc->data; 8100 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8101 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8102 Mat S_j; 8103 PetscInt *used_xadj,*used_adjncy; 8104 PetscBool free_used_adj; 8105 PetscErrorCode ierr; 8106 8107 PetscFunctionBegin; 8108 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8109 free_used_adj = PETSC_FALSE; 8110 if (pcbddc->sub_schurs_layers == -1) { 8111 used_xadj = NULL; 8112 used_adjncy = NULL; 8113 } else { 8114 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8115 used_xadj = pcbddc->mat_graph->xadj; 8116 used_adjncy = pcbddc->mat_graph->adjncy; 8117 } else if (pcbddc->computed_rowadj) { 8118 used_xadj = pcbddc->mat_graph->xadj; 8119 used_adjncy = pcbddc->mat_graph->adjncy; 8120 } else { 8121 PetscBool flg_row=PETSC_FALSE; 8122 const PetscInt *xadj,*adjncy; 8123 PetscInt nvtxs; 8124 8125 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8126 if (flg_row) { 8127 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8128 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8129 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8130 free_used_adj = PETSC_TRUE; 8131 } else { 8132 pcbddc->sub_schurs_layers = -1; 8133 used_xadj = NULL; 8134 used_adjncy = NULL; 8135 } 8136 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8137 } 8138 } 8139 8140 /* setup sub_schurs data */ 8141 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8142 if (!sub_schurs->schur_explicit) { 8143 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8144 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8145 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8146 } else { 8147 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8148 PetscBool isseqaij,need_change = PETSC_FALSE; 8149 PetscInt benign_n; 8150 Mat change = NULL; 8151 Vec scaling = NULL; 8152 IS change_primal = NULL; 8153 8154 if (!pcbddc->use_vertices && reuse_solvers) { 8155 PetscInt n_vertices; 8156 8157 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8158 reuse_solvers = (PetscBool)!n_vertices; 8159 } 8160 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8161 if (!isseqaij) { 8162 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8163 if (matis->A == pcbddc->local_mat) { 8164 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8165 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8166 } else { 8167 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8168 } 8169 } 8170 if (!pcbddc->benign_change_explicit) { 8171 benign_n = pcbddc->benign_n; 8172 } else { 8173 benign_n = 0; 8174 } 8175 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8176 We need a global reduction to avoid possible deadlocks. 8177 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8178 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8179 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8180 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8181 need_change = (PetscBool)(!need_change); 8182 } 8183 /* If the user defines additional constraints, we import them here. 8184 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8185 if (need_change) { 8186 PC_IS *pcisf; 8187 PC_BDDC *pcbddcf; 8188 PC pcf; 8189 8190 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8191 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8192 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8193 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8194 8195 /* hacks */ 8196 pcisf = (PC_IS*)pcf->data; 8197 pcisf->is_B_local = pcis->is_B_local; 8198 pcisf->vec1_N = pcis->vec1_N; 8199 pcisf->BtoNmap = pcis->BtoNmap; 8200 pcisf->n = pcis->n; 8201 pcisf->n_B = pcis->n_B; 8202 pcbddcf = (PC_BDDC*)pcf->data; 8203 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8204 pcbddcf->mat_graph = pcbddc->mat_graph; 8205 pcbddcf->use_faces = PETSC_TRUE; 8206 pcbddcf->use_change_of_basis = PETSC_TRUE; 8207 pcbddcf->use_change_on_faces = PETSC_TRUE; 8208 pcbddcf->use_qr_single = PETSC_TRUE; 8209 pcbddcf->fake_change = PETSC_TRUE; 8210 8211 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8212 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8213 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8214 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8215 change = pcbddcf->ConstraintMatrix; 8216 pcbddcf->ConstraintMatrix = NULL; 8217 8218 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8219 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8220 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8221 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8222 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8223 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8224 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8225 pcf->ops->destroy = NULL; 8226 pcf->ops->reset = NULL; 8227 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8228 } 8229 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8230 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8231 ierr = MatDestroy(&change);CHKERRQ(ierr); 8232 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8233 } 8234 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8235 8236 /* free adjacency */ 8237 if (free_used_adj) { 8238 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8239 } 8240 PetscFunctionReturn(0); 8241 } 8242 8243 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8244 { 8245 PC_IS *pcis=(PC_IS*)pc->data; 8246 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8247 PCBDDCGraph graph; 8248 PetscErrorCode ierr; 8249 8250 PetscFunctionBegin; 8251 /* attach interface graph for determining subsets */ 8252 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8253 IS verticesIS,verticescomm; 8254 PetscInt vsize,*idxs; 8255 8256 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8257 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8258 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8259 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8260 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8261 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8262 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8263 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8264 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8265 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8266 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8267 } else { 8268 graph = pcbddc->mat_graph; 8269 } 8270 /* print some info */ 8271 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8272 IS vertices; 8273 PetscInt nv,nedges,nfaces; 8274 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8275 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8276 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8277 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8279 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8283 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8284 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8285 } 8286 8287 /* sub_schurs init */ 8288 if (!pcbddc->sub_schurs) { 8289 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8290 } 8291 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8292 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8293 8294 /* free graph struct */ 8295 if (pcbddc->sub_schurs_rebuild) { 8296 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8297 } 8298 PetscFunctionReturn(0); 8299 } 8300 8301 PetscErrorCode PCBDDCCheckOperator(PC pc) 8302 { 8303 PC_IS *pcis=(PC_IS*)pc->data; 8304 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8305 PetscErrorCode ierr; 8306 8307 PetscFunctionBegin; 8308 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8309 IS zerodiag = NULL; 8310 Mat S_j,B0_B=NULL; 8311 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8312 PetscScalar *p0_check,*array,*array2; 8313 PetscReal norm; 8314 PetscInt i; 8315 8316 /* B0 and B0_B */ 8317 if (zerodiag) { 8318 IS dummy; 8319 8320 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8321 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8322 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8323 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8324 } 8325 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8326 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8327 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8328 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8329 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8330 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8331 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8332 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8333 /* S_j */ 8334 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8335 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8336 8337 /* mimic vector in \widetilde{W}_\Gamma */ 8338 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8339 /* continuous in primal space */ 8340 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8341 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8342 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8343 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8344 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8345 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8346 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8347 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8348 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8349 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8350 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8351 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8352 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8353 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8354 8355 /* assemble rhs for coarse problem */ 8356 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8357 /* local with Schur */ 8358 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8359 if (zerodiag) { 8360 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8361 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8362 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8363 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8364 } 8365 /* sum on primal nodes the local contributions */ 8366 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8367 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8368 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8369 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8370 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8371 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8372 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8373 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8374 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8375 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8376 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8377 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8378 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8379 /* scale primal nodes (BDDC sums contibutions) */ 8380 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8381 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8382 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8383 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8384 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8385 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8386 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8387 /* global: \widetilde{B0}_B w_\Gamma */ 8388 if (zerodiag) { 8389 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8390 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8391 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8392 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8393 } 8394 /* BDDC */ 8395 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8396 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8397 8398 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8399 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8400 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8401 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8402 for (i=0;i<pcbddc->benign_n;i++) { 8403 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8404 } 8405 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8406 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8407 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8408 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8409 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8410 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8411 } 8412 PetscFunctionReturn(0); 8413 } 8414 8415 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8416 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8417 { 8418 Mat At; 8419 IS rows; 8420 PetscInt rst,ren; 8421 PetscErrorCode ierr; 8422 PetscLayout rmap; 8423 8424 PetscFunctionBegin; 8425 rst = ren = 0; 8426 if (ccomm != MPI_COMM_NULL) { 8427 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8428 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8429 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8430 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8431 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8432 } 8433 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8434 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8435 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8436 8437 if (ccomm != MPI_COMM_NULL) { 8438 Mat_MPIAIJ *a,*b; 8439 IS from,to; 8440 Vec gvec; 8441 PetscInt lsize; 8442 8443 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8444 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8445 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8446 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8447 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8448 a = (Mat_MPIAIJ*)At->data; 8449 b = (Mat_MPIAIJ*)(*B)->data; 8450 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8451 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8452 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8453 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8454 b->A = a->A; 8455 b->B = a->B; 8456 8457 b->donotstash = a->donotstash; 8458 b->roworiented = a->roworiented; 8459 b->rowindices = 0; 8460 b->rowvalues = 0; 8461 b->getrowactive = PETSC_FALSE; 8462 8463 (*B)->rmap = rmap; 8464 (*B)->factortype = A->factortype; 8465 (*B)->assembled = PETSC_TRUE; 8466 (*B)->insertmode = NOT_SET_VALUES; 8467 (*B)->preallocated = PETSC_TRUE; 8468 8469 if (a->colmap) { 8470 #if defined(PETSC_USE_CTABLE) 8471 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8472 #else 8473 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8474 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8475 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8476 #endif 8477 } else b->colmap = 0; 8478 if (a->garray) { 8479 PetscInt len; 8480 len = a->B->cmap->n; 8481 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8482 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8483 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8484 } else b->garray = 0; 8485 8486 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8487 b->lvec = a->lvec; 8488 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8489 8490 /* cannot use VecScatterCopy */ 8491 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8492 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8493 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8494 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8495 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8496 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8497 ierr = ISDestroy(&from);CHKERRQ(ierr); 8498 ierr = ISDestroy(&to);CHKERRQ(ierr); 8499 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8500 } 8501 ierr = MatDestroy(&At);CHKERRQ(ierr); 8502 PetscFunctionReturn(0); 8503 } 8504