1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* if range is true, it returns B s.t. span{B} = range(A) 10 if range is false, it returns B s.t. range(B) _|_ range(A) */ 11 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 12 { 13 #if !defined(PETSC_USE_COMPLEX) 14 PetscScalar *uwork,*data,*U, ds = 0.; 15 PetscReal *sing; 16 PetscBLASInt bM,bN,lwork,lierr,di = 1; 17 PetscInt ulw,i,nr,nc,n; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 #if defined(PETSC_MISSING_LAPACK_GESVD) 22 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 23 #else 24 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 25 if (!nr || !nc) PetscFunctionReturn(0); 26 27 /* workspace */ 28 if (!work) { 29 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 30 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 31 } else { 32 ulw = lw; 33 uwork = work; 34 } 35 n = PetscMin(nr,nc); 36 if (!rwork) { 37 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 38 } else { 39 sing = rwork; 40 } 41 42 /* SVD */ 43 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 47 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 48 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 49 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 50 ierr = PetscFPTrapPop();CHKERRQ(ierr); 51 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 52 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 53 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 54 if (!rwork) { 55 ierr = PetscFree(sing);CHKERRQ(ierr); 56 } 57 if (!work) { 58 ierr = PetscFree(uwork);CHKERRQ(ierr); 59 } 60 /* create B */ 61 if (!range) { 62 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 63 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 65 } else { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } 70 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 71 ierr = PetscFree(U);CHKERRQ(ierr); 72 #endif 73 #else /* PETSC_USE_COMPLEX */ 74 PetscFunctionBegin; 75 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 76 #endif 77 PetscFunctionReturn(0); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 PetscErrorCode ierr; 89 Mat GE,GEd; 90 PetscInt rsize,csize,esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 95 if (!esize) PetscFunctionReturn(0); 96 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 97 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 98 99 /* gradients */ 100 ptr = work + 5*esize; 101 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 103 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 104 ierr = MatDestroy(&GE);CHKERRQ(ierr); 105 106 /* constants */ 107 ptr += rsize*csize; 108 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 109 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 110 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 111 ierr = MatDestroy(&GE);CHKERRQ(ierr); 112 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 113 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 114 115 if (corners) { 116 Mat GEc; 117 PetscScalar *vals,v; 118 119 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 120 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 121 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 122 /* v = PetscAbsScalar(vals[0]) */; 123 v = 1.; 124 cvals[0] = vals[0]/v; 125 cvals[1] = vals[1]/v; 126 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 127 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 128 #if defined(PRINT_GDET) 129 { 130 PetscViewer viewer; 131 char filename[256]; 132 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 133 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 134 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 135 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 136 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 137 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 138 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 140 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 141 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 142 } 143 #endif 144 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 145 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 146 } 147 148 PetscFunctionReturn(0); 149 } 150 151 PetscErrorCode PCBDDCNedelecSupport(PC pc) 152 { 153 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 154 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 155 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 156 Vec tvec; 157 PetscSF sfv; 158 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 159 MPI_Comm comm; 160 IS lned,primals,allprimals,nedfieldlocal; 161 IS *eedges,*extrows,*extcols,*alleedges; 162 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 163 PetscScalar *vals,*work; 164 PetscReal *rwork; 165 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 166 PetscInt ne,nv,Lv,order,n,field; 167 PetscInt n_neigh,*neigh,*n_shared,**shared; 168 PetscInt i,j,extmem,cum,maxsize,nee; 169 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 170 PetscInt *sfvleaves,*sfvroots; 171 PetscInt *corners,*cedges; 172 PetscInt *ecount,**eneighs,*vcount,**vneighs; 173 #if defined(PETSC_USE_DEBUG) 174 PetscInt *emarks; 175 #endif 176 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 /* If the discrete gradient is defined for a subset of dofs and global is true, 181 it assumes G is given in global ordering for all the dofs. 182 Otherwise, the ordering is global for the Nedelec field */ 183 order = pcbddc->nedorder; 184 conforming = pcbddc->conforming; 185 field = pcbddc->nedfield; 186 global = pcbddc->nedglobal; 187 setprimal = PETSC_FALSE; 188 print = PETSC_FALSE; 189 singular = PETSC_FALSE; 190 191 /* Command line customization */ 192 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 193 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 194 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 195 ierr = PetscOptionsInt ("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 196 /* print debug info TODO: to be removed */ 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsEnd();CHKERRQ(ierr); 199 200 /* Return if there are no edges in the decomposition and the problem is not singular */ 201 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 202 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 203 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 204 if (!singular) { 205 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 206 lrc[0] = PETSC_FALSE; 207 for (i=0;i<n;i++) { 208 if (PetscRealPart(vals[i]) > 2.) { 209 lrc[0] = PETSC_TRUE; 210 break; 211 } 212 } 213 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 214 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 215 if (!lrc[1]) PetscFunctionReturn(0); 216 } 217 218 /* Get Nedelec field */ 219 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 220 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 221 if (pcbddc->n_ISForDofsLocal && field >= 0) { 222 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 223 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 224 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 225 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 226 ne = n; 227 nedfieldlocal = NULL; 228 global = PETSC_TRUE; 229 } else if (field == PETSC_DECIDE) { 230 PetscInt rst,ren,*idx; 231 232 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 233 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 234 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 235 for (i=rst;i<ren;i++) { 236 PetscInt nc; 237 238 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 239 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 240 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 } 242 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 243 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 244 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 245 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 246 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 247 } else { 248 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 249 } 250 251 /* Sanity checks */ 252 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 253 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 254 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 255 256 /* Just set primal dofs and return */ 257 if (setprimal) { 258 IS enedfieldlocal; 259 PetscInt *eidxs; 260 261 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 262 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 263 if (nedfieldlocal) { 264 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 265 for (i=0,cum=0;i<ne;i++) { 266 if (PetscRealPart(vals[idxs[i]]) > 2.) { 267 eidxs[cum++] = idxs[i]; 268 } 269 } 270 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 271 } else { 272 for (i=0,cum=0;i<ne;i++) { 273 if (PetscRealPart(vals[i]) > 2.) { 274 eidxs[cum++] = i; 275 } 276 } 277 } 278 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 279 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 280 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 281 ierr = PetscFree(eidxs);CHKERRQ(ierr); 282 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 283 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 /* Compute some l2g maps */ 288 if (nedfieldlocal) { 289 IS is; 290 291 /* need to map from the local Nedelec field to local numbering */ 292 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 293 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 294 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 295 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 297 if (global) { 298 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 299 el2g = al2g; 300 } else { 301 IS gis; 302 303 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 304 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 305 ierr = ISDestroy(&gis);CHKERRQ(ierr); 306 } 307 ierr = ISDestroy(&is);CHKERRQ(ierr); 308 } else { 309 /* restore default */ 310 pcbddc->nedfield = -1; 311 /* one ref for the destruction of al2g, one for el2g */ 312 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 313 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 314 el2g = al2g; 315 fl2g = NULL; 316 } 317 318 /* Start communication to drop connections for interior edges (for cc analysis only) */ 319 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 320 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 321 if (nedfieldlocal) { 322 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 324 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 } else { 326 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 327 } 328 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 329 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 330 331 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 332 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 333 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 334 if (global) { 335 PetscInt rst; 336 337 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 338 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 339 if (matis->sf_rootdata[i] < 2) { 340 matis->sf_rootdata[cum++] = i + rst; 341 } 342 } 343 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 344 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 345 } else { 346 PetscInt *tbz; 347 348 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 349 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 350 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 351 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 352 for (i=0,cum=0;i<ne;i++) 353 if (matis->sf_leafdata[idxs[i]] == 1) 354 tbz[cum++] = i; 355 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 357 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 358 ierr = PetscFree(tbz);CHKERRQ(ierr); 359 } 360 } else { /* we need the entire G to infer the nullspace */ 361 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 362 G = pcbddc->discretegradient; 363 } 364 365 /* Extract subdomain relevant rows of G */ 366 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 367 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 368 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 369 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISDestroy(&lned);CHKERRQ(ierr); 371 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 372 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 373 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 374 375 /* SF for nodal dofs communications */ 376 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 377 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 378 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 379 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 380 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 382 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 384 i = singular ? 2 : 1; 385 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 386 387 /* Destroy temporary G created in MATIS format and modified G */ 388 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 389 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 390 ierr = MatDestroy(&G);CHKERRQ(ierr); 391 392 if (print) { 393 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 394 ierr = MatView(lG,NULL);CHKERRQ(ierr); 395 } 396 397 /* Save lG for values insertion in change of basis */ 398 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 399 400 /* Analyze the edge-nodes connections (duplicate lG) */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 402 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 403 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 404 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 405 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 408 /* need to import the boundary specification to ensure the 409 proper detection of coarse edges' endpoints */ 410 if (pcbddc->DirichletBoundariesLocal) { 411 IS is; 412 413 if (fl2g) { 414 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 415 } else { 416 is = pcbddc->DirichletBoundariesLocal; 417 } 418 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 419 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 420 for (i=0;i<cum;i++) { 421 if (idxs[i] >= 0) { 422 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 423 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 424 } 425 } 426 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 427 if (fl2g) { 428 ierr = ISDestroy(&is);CHKERRQ(ierr); 429 } 430 } 431 if (pcbddc->NeumannBoundariesLocal) { 432 IS is; 433 434 if (fl2g) { 435 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 436 } else { 437 is = pcbddc->NeumannBoundariesLocal; 438 } 439 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 441 for (i=0;i<cum;i++) { 442 if (idxs[i] >= 0) { 443 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 444 } 445 } 446 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 447 if (fl2g) { 448 ierr = ISDestroy(&is);CHKERRQ(ierr); 449 } 450 } 451 452 /* Count neighs per dof */ 453 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 454 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 455 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 456 for (i=1,cum=0;i<n_neigh;i++) { 457 cum += n_shared[i]; 458 for (j=0;j<n_shared[i];j++) { 459 ecount[shared[i][j]]++; 460 } 461 } 462 if (ne) { 463 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 464 } 465 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 466 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 467 for (i=1;i<n_neigh;i++) { 468 for (j=0;j<n_shared[i];j++) { 469 PetscInt k = shared[i][j]; 470 eneighs[k][ecount[k]] = neigh[i]; 471 ecount[k]++; 472 } 473 } 474 for (i=0;i<ne;i++) { 475 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 476 } 477 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 478 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 479 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 480 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 481 for (i=1,cum=0;i<n_neigh;i++) { 482 cum += n_shared[i]; 483 for (j=0;j<n_shared[i];j++) { 484 vcount[shared[i][j]]++; 485 } 486 } 487 if (nv) { 488 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 489 } 490 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 491 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 492 for (i=1;i<n_neigh;i++) { 493 for (j=0;j<n_shared[i];j++) { 494 PetscInt k = shared[i][j]; 495 vneighs[k][vcount[k]] = neigh[i]; 496 vcount[k]++; 497 } 498 } 499 for (i=0;i<nv;i++) { 500 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 501 } 502 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 503 504 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 505 for proper detection of coarse edges' endpoints */ 506 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 507 for (i=0;i<ne;i++) { 508 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 509 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 510 } 511 } 512 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 513 if (!conforming) { 514 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 515 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 516 } 517 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 518 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 519 cum = 0; 520 for (i=0;i<ne;i++) { 521 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 522 if (!PetscBTLookup(btee,i)) { 523 marks[cum++] = i; 524 continue; 525 } 526 /* set badly connected edge dofs as primal */ 527 if (!conforming) { 528 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 529 marks[cum++] = i; 530 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 531 for (j=ii[i];j<ii[i+1];j++) { 532 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 533 } 534 } else { 535 /* every edge dofs should be connected trough a certain number of nodal dofs 536 to other edge dofs belonging to coarse edges 537 - at most 2 endpoints 538 - order-1 interior nodal dofs 539 - no undefined nodal dofs (nconn < order) 540 */ 541 PetscInt ends = 0,ints = 0, undef = 0; 542 for (j=ii[i];j<ii[i+1];j++) { 543 PetscInt v = jj[j],k; 544 PetscInt nconn = iit[v+1]-iit[v]; 545 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 546 if (nconn > order) ends++; 547 else if (nconn == order) ints++; 548 else undef++; 549 } 550 if (undef || ends > 2 || ints != order -1) { 551 marks[cum++] = i; 552 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 553 for (j=ii[i];j<ii[i+1];j++) { 554 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 555 } 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i+1] != ii[i]) { 561 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 562 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 563 } 564 } 565 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 566 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 567 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 568 if (!conforming) { 569 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 570 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 571 } 572 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 573 574 /* identify splitpoints and corner candidates */ 575 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 576 if (print) { 577 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 578 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 579 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 580 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 581 } 582 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 583 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 584 for (i=0;i<nv;i++) { 585 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 586 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 587 if (!order) { /* variable order */ 588 PetscReal vorder = 0.; 589 590 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 591 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 592 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 593 ord = 1; 594 } 595 #if defined(PETSC_USE_DEBUG) 596 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 597 #endif 598 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 599 if (PetscBTLookup(btbd,jj[j])) { 600 bdir = PETSC_TRUE; 601 break; 602 } 603 if (vc != ecount[jj[j]]) { 604 sneighs = PETSC_FALSE; 605 } else { 606 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 607 for (k=0;k<vc;k++) { 608 if (vn[k] != en[k]) { 609 sneighs = PETSC_FALSE; 610 break; 611 } 612 } 613 } 614 } 615 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 616 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 617 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 618 } else if (test == ord) { 619 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 620 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else { 623 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 624 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 625 } 626 } 627 } 628 ierr = PetscFree(ecount);CHKERRQ(ierr); 629 ierr = PetscFree(vcount);CHKERRQ(ierr); 630 if (ne) { 631 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 632 } 633 if (nv) { 634 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 635 } 636 ierr = PetscFree(eneighs);CHKERRQ(ierr); 637 ierr = PetscFree(vneighs);CHKERRQ(ierr); 638 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 639 640 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 641 if (order != 1) { 642 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 643 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 644 for (i=0;i<nv;i++) { 645 if (PetscBTLookup(btvcand,i)) { 646 PetscBool found = PETSC_FALSE; 647 for (j=ii[i];j<ii[i+1] && !found;j++) { 648 PetscInt k,e = jj[j]; 649 if (PetscBTLookup(bte,e)) continue; 650 for (k=iit[e];k<iit[e+1];k++) { 651 PetscInt v = jjt[k]; 652 if (v != i && PetscBTLookup(btvcand,v)) { 653 found = PETSC_TRUE; 654 break; 655 } 656 } 657 } 658 if (!found) { 659 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 660 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 661 } else { 662 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 663 } 664 } 665 } 666 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 667 } 668 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 669 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 670 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 671 672 /* Get the local G^T explicitly */ 673 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 674 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 675 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 676 677 /* Mark interior nodal dofs */ 678 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 679 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 680 for (i=1;i<n_neigh;i++) { 681 for (j=0;j<n_shared[i];j++) { 682 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 683 } 684 } 685 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 686 687 /* communicate corners and splitpoints */ 688 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 689 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 690 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 691 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 692 693 if (print) { 694 IS tbz; 695 696 cum = 0; 697 for (i=0;i<nv;i++) 698 if (sfvleaves[i]) 699 vmarks[cum++] = i; 700 701 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 702 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 703 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 704 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 705 } 706 707 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 708 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 709 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 710 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 711 712 /* Zero rows of lGt corresponding to identified corners 713 and interior nodal dofs */ 714 cum = 0; 715 for (i=0;i<nv;i++) { 716 if (sfvleaves[i]) { 717 vmarks[cum++] = i; 718 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 719 } 720 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 721 } 722 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 723 if (print) { 724 IS tbz; 725 726 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 727 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 728 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 729 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 730 } 731 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 732 ierr = PetscFree(vmarks);CHKERRQ(ierr); 733 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 734 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 735 736 /* Recompute G */ 737 ierr = MatDestroy(&lG);CHKERRQ(ierr); 738 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 739 if (print) { 740 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 741 ierr = MatView(lG,NULL);CHKERRQ(ierr); 742 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 743 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 744 } 745 746 /* Get primal dofs (if any) */ 747 cum = 0; 748 for (i=0;i<ne;i++) { 749 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 750 } 751 if (fl2g) { 752 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 753 } 754 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 755 if (print) { 756 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 757 ierr = ISView(primals,NULL);CHKERRQ(ierr); 758 } 759 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 760 /* TODO: what if the user passed in some of them ? */ 761 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 762 ierr = ISDestroy(&primals);CHKERRQ(ierr); 763 764 /* Compute edge connectivity */ 765 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 766 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 767 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 768 if (fl2g) { 769 PetscBT btf; 770 PetscInt *iia,*jja,*iiu,*jju; 771 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 772 773 /* create CSR for all local dofs */ 774 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 775 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 776 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 777 iiu = pcbddc->mat_graph->xadj; 778 jju = pcbddc->mat_graph->adjncy; 779 } else if (pcbddc->use_local_adj) { 780 rest = PETSC_TRUE; 781 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 782 } else { 783 free = PETSC_TRUE; 784 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 785 iiu[0] = 0; 786 for (i=0;i<n;i++) { 787 iiu[i+1] = i+1; 788 jju[i] = -1; 789 } 790 } 791 792 /* import sizes of CSR */ 793 iia[0] = 0; 794 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 795 796 /* overwrite entries corresponding to the Nedelec field */ 797 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 798 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 799 for (i=0;i<ne;i++) { 800 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 801 iia[idxs[i]+1] = ii[i+1]-ii[i]; 802 } 803 804 /* iia in CSR */ 805 for (i=0;i<n;i++) iia[i+1] += iia[i]; 806 807 /* jja in CSR */ 808 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 809 for (i=0;i<n;i++) 810 if (!PetscBTLookup(btf,i)) 811 for (j=0;j<iiu[i+1]-iiu[i];j++) 812 jja[iia[i]+j] = jju[iiu[i]+j]; 813 814 /* map edge dofs connectivity */ 815 if (jj) { 816 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 817 for (i=0;i<ne;i++) { 818 PetscInt e = idxs[i]; 819 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 820 } 821 } 822 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 823 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 824 if (rest) { 825 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 826 } 827 if (free) { 828 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 829 } 830 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 831 } else { 832 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 833 } 834 835 /* Analyze interface for edge dofs */ 836 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 837 pcbddc->mat_graph->twodim = PETSC_FALSE; 838 839 /* Get coarse edges in the edge space */ 840 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 841 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 842 843 if (fl2g) { 844 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 845 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 846 for (i=0;i<nee;i++) { 847 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 848 } 849 } else { 850 eedges = alleedges; 851 primals = allprimals; 852 } 853 854 /* Mark fine edge dofs with their coarse edge id */ 855 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 856 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 857 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 858 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 859 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 860 if (print) { 861 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 862 ierr = ISView(primals,NULL);CHKERRQ(ierr); 863 } 864 865 maxsize = 0; 866 for (i=0;i<nee;i++) { 867 PetscInt size,mark = i+1; 868 869 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 870 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 871 for (j=0;j<size;j++) marks[idxs[j]] = mark; 872 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 873 maxsize = PetscMax(maxsize,size); 874 } 875 876 /* Find coarse edge endpoints */ 877 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 878 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 879 for (i=0;i<nee;i++) { 880 PetscInt mark = i+1,size; 881 882 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 883 if (!size && nedfieldlocal) continue; 884 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 885 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 886 if (print) { 887 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 888 ISView(eedges[i],NULL); 889 } 890 for (j=0;j<size;j++) { 891 PetscInt k, ee = idxs[j]; 892 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 893 for (k=ii[ee];k<ii[ee+1];k++) { 894 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 895 if (PetscBTLookup(btv,jj[k])) { 896 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 897 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 898 PetscInt k2; 899 PetscBool corner = PETSC_FALSE; 900 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 901 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 902 /* it's a corner if either is connected with an edge dof belonging to a different cc or 903 if the edge dof lie on the natural part of the boundary */ 904 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 905 corner = PETSC_TRUE; 906 break; 907 } 908 } 909 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 910 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 911 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 912 } else { 913 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 914 } 915 } 916 } 917 } 918 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 919 } 920 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 921 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 922 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 923 924 /* Reset marked primal dofs */ 925 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 926 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 927 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 928 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 929 930 /* Now use the initial lG */ 931 ierr = MatDestroy(&lG);CHKERRQ(ierr); 932 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 933 lG = lGinit; 934 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 935 936 /* Compute extended cols indices */ 937 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 938 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 939 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 940 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 941 i *= maxsize; 942 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 943 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 944 eerr = PETSC_FALSE; 945 for (i=0;i<nee;i++) { 946 PetscInt size,found = 0; 947 948 cum = 0; 949 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 950 if (!size && nedfieldlocal) continue; 951 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 952 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 953 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 954 for (j=0;j<size;j++) { 955 PetscInt k,ee = idxs[j]; 956 for (k=ii[ee];k<ii[ee+1];k++) { 957 PetscInt vv = jj[k]; 958 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 959 else if (!PetscBTLookupSet(btvc,vv)) found++; 960 } 961 } 962 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 963 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 964 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 965 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 966 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 967 /* it may happen that endpoints are not defined at this point 968 if it is the case, mark this edge for a second pass */ 969 if (cum != size -1 || found != 2) { 970 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 971 if (print) { 972 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 973 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 974 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 975 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 976 } 977 eerr = PETSC_TRUE; 978 } 979 } 980 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 981 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 982 if (done) { 983 PetscInt *newprimals; 984 985 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 986 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 987 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 988 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 989 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 990 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 991 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 992 for (i=0;i<nee;i++) { 993 PetscBool has_candidates = PETSC_FALSE; 994 if (PetscBTLookup(bter,i)) { 995 PetscInt size,mark = i+1; 996 997 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 998 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 999 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1000 for (j=0;j<size;j++) { 1001 PetscInt k,ee = idxs[j]; 1002 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1003 for (k=ii[ee];k<ii[ee+1];k++) { 1004 /* set all candidates located on the edge as corners */ 1005 if (PetscBTLookup(btvcand,jj[k])) { 1006 PetscInt k2,vv = jj[k]; 1007 has_candidates = PETSC_TRUE; 1008 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1009 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1010 /* set all edge dofs connected to candidate as primals */ 1011 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1012 if (marks[jjt[k2]] == mark) { 1013 PetscInt k3,ee2 = jjt[k2]; 1014 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1015 newprimals[cum++] = ee2; 1016 /* finally set the new corners */ 1017 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1019 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1020 } 1021 } 1022 } 1023 } else { 1024 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1025 } 1026 } 1027 } 1028 if (!has_candidates) { /* circular edge */ 1029 PetscInt k, ee = idxs[0],*tmarks; 1030 1031 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1032 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1033 for (k=ii[ee];k<ii[ee+1];k++) { 1034 PetscInt k2; 1035 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1036 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1037 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1038 } 1039 for (j=0;j<size;j++) { 1040 if (tmarks[idxs[j]] > 1) { 1041 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1042 newprimals[cum++] = idxs[j]; 1043 } 1044 } 1045 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1046 } 1047 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1048 } 1049 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1050 } 1051 ierr = PetscFree(extcols);CHKERRQ(ierr); 1052 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1053 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1054 if (fl2g) { 1055 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1056 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1057 for (i=0;i<nee;i++) { 1058 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1059 } 1060 ierr = PetscFree(eedges);CHKERRQ(ierr); 1061 } 1062 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1063 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1064 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1065 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1066 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1067 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1068 pcbddc->mat_graph->twodim = PETSC_FALSE; 1069 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1070 if (fl2g) { 1071 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1072 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1073 for (i=0;i<nee;i++) { 1074 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1075 } 1076 } else { 1077 eedges = alleedges; 1078 primals = allprimals; 1079 } 1080 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1081 1082 /* Mark again */ 1083 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1084 for (i=0;i<nee;i++) { 1085 PetscInt size,mark = i+1; 1086 1087 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1088 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1089 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1090 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1091 } 1092 if (print) { 1093 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1094 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1095 } 1096 1097 /* Recompute extended cols */ 1098 eerr = PETSC_FALSE; 1099 for (i=0;i<nee;i++) { 1100 PetscInt size; 1101 1102 cum = 0; 1103 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1104 if (!size && nedfieldlocal) continue; 1105 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1106 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1107 for (j=0;j<size;j++) { 1108 PetscInt k,ee = idxs[j]; 1109 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1110 } 1111 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1112 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1113 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1114 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1115 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1116 if (cum != size -1) { 1117 if (print) { 1118 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1119 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1121 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1122 } 1123 eerr = PETSC_TRUE; 1124 } 1125 } 1126 } 1127 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1128 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1129 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1130 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1131 /* an error should not occur at this point */ 1132 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1133 1134 /* Check the number of endpoints */ 1135 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1136 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1137 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1138 for (i=0;i<nee;i++) { 1139 PetscInt size, found = 0, gc[2]; 1140 1141 /* init with defaults */ 1142 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1143 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1144 if (!size && nedfieldlocal) continue; 1145 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1146 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1147 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1148 for (j=0;j<size;j++) { 1149 PetscInt k,ee = idxs[j]; 1150 for (k=ii[ee];k<ii[ee+1];k++) { 1151 PetscInt vv = jj[k]; 1152 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1153 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1154 corners[i*2+found++] = vv; 1155 } 1156 } 1157 } 1158 if (found != 2) { 1159 PetscInt e; 1160 if (fl2g) { 1161 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1162 } else { 1163 e = idxs[0]; 1164 } 1165 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1166 } 1167 1168 /* get primal dof index on this coarse edge */ 1169 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1170 if (gc[0] > gc[1]) { 1171 PetscInt swap = corners[2*i]; 1172 corners[2*i] = corners[2*i+1]; 1173 corners[2*i+1] = swap; 1174 } 1175 cedges[i] = idxs[size-1]; 1176 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1177 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1178 } 1179 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1180 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1181 1182 #if defined(PETSC_USE_DEBUG) 1183 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1184 not interfere with neighbouring coarse edges */ 1185 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1186 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1187 for (i=0;i<nv;i++) { 1188 PetscInt emax = 0,eemax = 0; 1189 1190 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1191 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1192 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1193 for (j=1;j<nee+1;j++) { 1194 if (emax < emarks[j]) { 1195 emax = emarks[j]; 1196 eemax = j; 1197 } 1198 } 1199 /* not relevant for edges */ 1200 if (!eemax) continue; 1201 1202 for (j=ii[i];j<ii[i+1];j++) { 1203 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1204 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1205 } 1206 } 1207 } 1208 ierr = PetscFree(emarks);CHKERRQ(ierr); 1209 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1210 #endif 1211 1212 /* Compute extended rows indices for edge blocks of the change of basis */ 1213 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1215 extmem *= maxsize; 1216 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1217 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1218 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1219 for (i=0;i<nv;i++) { 1220 PetscInt mark = 0,size,start; 1221 1222 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1223 for (j=ii[i];j<ii[i+1];j++) 1224 if (marks[jj[j]] && !mark) 1225 mark = marks[jj[j]]; 1226 1227 /* not relevant */ 1228 if (!mark) continue; 1229 1230 /* import extended row */ 1231 mark--; 1232 start = mark*extmem+extrowcum[mark]; 1233 size = ii[i+1]-ii[i]; 1234 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1235 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1236 extrowcum[mark] += size; 1237 } 1238 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1239 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1240 ierr = PetscFree(marks);CHKERRQ(ierr); 1241 1242 /* Compress extrows */ 1243 cum = 0; 1244 for (i=0;i<nee;i++) { 1245 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1246 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1247 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1248 cum = PetscMax(cum,size); 1249 } 1250 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1251 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1252 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1253 1254 /* Workspace for lapack inner calls and VecSetValues */ 1255 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1256 1257 /* Create change of basis matrix (preallocation can be improved) */ 1258 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1259 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1260 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1261 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1262 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1263 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1264 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1265 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1266 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1267 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1268 1269 /* Defaults to identity */ 1270 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1271 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1272 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1273 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1274 1275 /* Create discrete gradient for the coarser level if needed */ 1276 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1277 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1278 if (pcbddc->current_level < pcbddc->max_levels) { 1279 ISLocalToGlobalMapping cel2g,cvl2g; 1280 IS wis,gwis; 1281 PetscInt cnv,cne; 1282 1283 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1284 if (fl2g) { 1285 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1286 } else { 1287 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1288 pcbddc->nedclocal = wis; 1289 } 1290 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1291 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1292 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1293 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1294 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1296 1297 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1298 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1300 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1301 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1302 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1304 1305 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1306 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1307 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1308 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1309 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1310 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1311 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1312 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1313 } 1314 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1315 1316 #if defined(PRINT_GDET) 1317 inc = 0; 1318 lev = pcbddc->current_level; 1319 #endif 1320 1321 /* Insert values in the change of basis matrix */ 1322 for (i=0;i<nee;i++) { 1323 Mat Gins = NULL, GKins = NULL; 1324 IS cornersis = NULL; 1325 PetscScalar cvals[2]; 1326 1327 if (pcbddc->nedcG) { 1328 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1329 } 1330 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1331 if (Gins && GKins) { 1332 PetscScalar *data; 1333 const PetscInt *rows,*cols; 1334 PetscInt nrh,nch,nrc,ncc; 1335 1336 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1337 /* H1 */ 1338 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1339 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1340 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1341 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1342 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1343 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1344 /* complement */ 1345 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1346 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1347 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1348 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1349 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1350 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1351 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1352 1353 /* coarse discrete gradient */ 1354 if (pcbddc->nedcG) { 1355 PetscInt cols[2]; 1356 1357 cols[0] = 2*i; 1358 cols[1] = 2*i+1; 1359 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1360 } 1361 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1362 } 1363 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1364 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1365 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1366 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1367 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1368 } 1369 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1370 1371 /* Start assembling */ 1372 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1373 if (pcbddc->nedcG) { 1374 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 } 1376 1377 /* Free */ 1378 if (fl2g) { 1379 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1380 for (i=0;i<nee;i++) { 1381 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(eedges);CHKERRQ(ierr); 1384 } 1385 1386 /* hack mat_graph with primal dofs on the coarse edges */ 1387 { 1388 PCBDDCGraph graph = pcbddc->mat_graph; 1389 PetscInt *oqueue = graph->queue; 1390 PetscInt *ocptr = graph->cptr; 1391 PetscInt ncc,*idxs; 1392 1393 /* find first primal edge */ 1394 if (pcbddc->nedclocal) { 1395 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1396 } else { 1397 if (fl2g) { 1398 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1399 } 1400 idxs = cedges; 1401 } 1402 cum = 0; 1403 while (cum < nee && cedges[cum] < 0) cum++; 1404 1405 /* adapt connected components */ 1406 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1407 graph->cptr[0] = 0; 1408 for (i=0,ncc=0;i<graph->ncc;i++) { 1409 PetscInt lc = ocptr[i+1]-ocptr[i]; 1410 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1411 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1412 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1413 ncc++; 1414 lc--; 1415 cum++; 1416 while (cum < nee && cedges[cum] < 0) cum++; 1417 } 1418 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1419 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1420 ncc++; 1421 } 1422 graph->ncc = ncc; 1423 if (pcbddc->nedclocal) { 1424 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1427 } 1428 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1429 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1430 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1431 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1432 1433 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1434 ierr = PetscFree(extrow);CHKERRQ(ierr); 1435 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1436 ierr = PetscFree(corners);CHKERRQ(ierr); 1437 ierr = PetscFree(cedges);CHKERRQ(ierr); 1438 ierr = PetscFree(extrows);CHKERRQ(ierr); 1439 ierr = PetscFree(extcols);CHKERRQ(ierr); 1440 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1441 1442 /* Complete assembling */ 1443 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1444 if (pcbddc->nedcG) { 1445 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 #if 0 1447 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1448 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1449 #endif 1450 } 1451 1452 /* set change of basis */ 1453 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1454 ierr = MatDestroy(&T);CHKERRQ(ierr); 1455 1456 PetscFunctionReturn(0); 1457 } 1458 1459 /* the near-null space of BDDC carries information on quadrature weights, 1460 and these can be collinear -> so cheat with MatNullSpaceCreate 1461 and create a suitable set of basis vectors first */ 1462 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1463 { 1464 PetscErrorCode ierr; 1465 PetscInt i; 1466 1467 PetscFunctionBegin; 1468 for (i=0;i<nvecs;i++) { 1469 PetscInt first,last; 1470 1471 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1472 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1473 if (i>=first && i < last) { 1474 PetscScalar *data; 1475 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1476 if (!has_const) { 1477 data[i-first] = 1.; 1478 } else { 1479 data[2*i-first] = 1./PetscSqrtReal(2.); 1480 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1481 } 1482 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1483 } 1484 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1485 } 1486 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1487 for (i=0;i<nvecs;i++) { /* reset vectors */ 1488 PetscInt first,last; 1489 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1490 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1491 if (i>=first && i < last) { 1492 PetscScalar *data; 1493 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1494 if (!has_const) { 1495 data[i-first] = 0.; 1496 } else { 1497 data[2*i-first] = 0.; 1498 data[2*i-first+1] = 0.; 1499 } 1500 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1501 } 1502 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1503 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1504 } 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1509 { 1510 Mat loc_divudotp; 1511 Vec p,v,vins,quad_vec,*quad_vecs; 1512 ISLocalToGlobalMapping map; 1513 IS *faces,*edges; 1514 PetscScalar *vals; 1515 const PetscScalar *array; 1516 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1517 PetscMPIInt rank; 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1522 if (graph->twodim) { 1523 lmaxneighs = 2; 1524 } else { 1525 lmaxneighs = 1; 1526 for (i=0;i<ne;i++) { 1527 const PetscInt *idxs; 1528 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1529 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1530 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1531 } 1532 lmaxneighs++; /* graph count does not include self */ 1533 } 1534 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1535 maxsize = 0; 1536 for (i=0;i<ne;i++) { 1537 PetscInt nn; 1538 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1539 maxsize = PetscMax(maxsize,nn); 1540 } 1541 for (i=0;i<nf;i++) { 1542 PetscInt nn; 1543 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1544 maxsize = PetscMax(maxsize,nn); 1545 } 1546 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1547 /* create vectors to hold quadrature weights */ 1548 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1549 if (!transpose) { 1550 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1551 } else { 1552 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1553 } 1554 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1555 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1556 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1557 for (i=0;i<maxneighs;i++) { 1558 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1559 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1560 } 1561 1562 /* compute local quad vec */ 1563 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1564 if (!transpose) { 1565 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1566 } else { 1567 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1568 } 1569 ierr = VecSet(p,1.);CHKERRQ(ierr); 1570 if (!transpose) { 1571 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1572 } else { 1573 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1574 } 1575 if (vl2l) { 1576 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1577 } else { 1578 vins = v; 1579 } 1580 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1581 ierr = VecDestroy(&p);CHKERRQ(ierr); 1582 1583 /* insert in global quadrature vecs */ 1584 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1585 for (i=0;i<nf;i++) { 1586 const PetscInt *idxs; 1587 PetscInt idx,nn,j; 1588 1589 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1590 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1591 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1592 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1593 idx = -(idx+1); 1594 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1595 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1596 } 1597 for (i=0;i<ne;i++) { 1598 const PetscInt *idxs; 1599 PetscInt idx,nn,j; 1600 1601 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1602 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1603 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1604 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1605 idx = -(idx+1); 1606 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1608 } 1609 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1610 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1611 if (vl2l) { 1612 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1613 } 1614 ierr = VecDestroy(&v);CHKERRQ(ierr); 1615 ierr = PetscFree(vals);CHKERRQ(ierr); 1616 1617 /* assemble near null space */ 1618 for (i=0;i<maxneighs;i++) { 1619 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1620 } 1621 for (i=0;i<maxneighs;i++) { 1622 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1623 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1624 } 1625 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 1630 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1631 { 1632 PetscErrorCode ierr; 1633 Vec local,global; 1634 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1635 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1636 1637 PetscFunctionBegin; 1638 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1639 /* need to convert from global to local topology information and remove references to information in global ordering */ 1640 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1641 if (pcbddc->user_provided_isfordofs) { 1642 if (pcbddc->n_ISForDofs) { 1643 PetscInt i; 1644 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1645 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1646 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1647 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1648 } 1649 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1650 pcbddc->n_ISForDofs = 0; 1651 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1652 } 1653 } else { 1654 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1655 PetscInt i, n = matis->A->rmap->n; 1656 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1657 if (i > 1) { 1658 pcbddc->n_ISForDofsLocal = i; 1659 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1660 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1661 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1662 } 1663 } 1664 } else { 1665 PetscInt i; 1666 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1667 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1668 } 1669 } 1670 } 1671 1672 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1673 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1674 } else if (pcbddc->DirichletBoundariesLocal) { 1675 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1676 } 1677 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1678 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1679 } else if (pcbddc->NeumannBoundariesLocal) { 1680 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1681 } 1682 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1683 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1684 } 1685 ierr = VecDestroy(&global);CHKERRQ(ierr); 1686 ierr = VecDestroy(&local);CHKERRQ(ierr); 1687 1688 PetscFunctionReturn(0); 1689 } 1690 1691 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1692 { 1693 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1694 PetscErrorCode ierr; 1695 IS nis; 1696 const PetscInt *idxs; 1697 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1698 PetscBool *ld; 1699 1700 PetscFunctionBegin; 1701 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1702 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1703 if (mop == MPI_LAND) { 1704 /* init rootdata with true */ 1705 ld = (PetscBool*) matis->sf_rootdata; 1706 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1707 } else { 1708 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1709 } 1710 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1711 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1712 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1713 ld = (PetscBool*) matis->sf_leafdata; 1714 for (i=0;i<nd;i++) 1715 if (-1 < idxs[i] && idxs[i] < n) 1716 ld[idxs[i]] = PETSC_TRUE; 1717 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1718 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1719 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1720 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1721 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1722 if (mop == MPI_LAND) { 1723 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1724 } else { 1725 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1726 } 1727 for (i=0,nnd=0;i<n;i++) 1728 if (ld[i]) 1729 nidxs[nnd++] = i; 1730 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1731 ierr = ISDestroy(is);CHKERRQ(ierr); 1732 *is = nis; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1737 { 1738 PC_IS *pcis = (PC_IS*)(pc->data); 1739 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1740 PetscErrorCode ierr; 1741 1742 PetscFunctionBegin; 1743 if (!pcbddc->benign_have_null) { 1744 PetscFunctionReturn(0); 1745 } 1746 if (pcbddc->ChangeOfBasisMatrix) { 1747 Vec swap; 1748 1749 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1750 swap = pcbddc->work_change; 1751 pcbddc->work_change = r; 1752 r = swap; 1753 } 1754 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1755 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1756 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1757 ierr = VecSet(z,0.);CHKERRQ(ierr); 1758 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1759 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1760 if (pcbddc->ChangeOfBasisMatrix) { 1761 pcbddc->work_change = r; 1762 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1763 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1764 } 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1769 { 1770 PCBDDCBenignMatMult_ctx ctx; 1771 PetscErrorCode ierr; 1772 PetscBool apply_right,apply_left,reset_x; 1773 1774 PetscFunctionBegin; 1775 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1776 if (transpose) { 1777 apply_right = ctx->apply_left; 1778 apply_left = ctx->apply_right; 1779 } else { 1780 apply_right = ctx->apply_right; 1781 apply_left = ctx->apply_left; 1782 } 1783 reset_x = PETSC_FALSE; 1784 if (apply_right) { 1785 const PetscScalar *ax; 1786 PetscInt nl,i; 1787 1788 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1789 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1790 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1791 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1792 for (i=0;i<ctx->benign_n;i++) { 1793 PetscScalar sum,val; 1794 const PetscInt *idxs; 1795 PetscInt nz,j; 1796 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1797 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1798 sum = 0.; 1799 if (ctx->apply_p0) { 1800 val = ctx->work[idxs[nz-1]]; 1801 for (j=0;j<nz-1;j++) { 1802 sum += ctx->work[idxs[j]]; 1803 ctx->work[idxs[j]] += val; 1804 } 1805 } else { 1806 for (j=0;j<nz-1;j++) { 1807 sum += ctx->work[idxs[j]]; 1808 } 1809 } 1810 ctx->work[idxs[nz-1]] -= sum; 1811 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1812 } 1813 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1814 reset_x = PETSC_TRUE; 1815 } 1816 if (transpose) { 1817 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1818 } else { 1819 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1820 } 1821 if (reset_x) { 1822 ierr = VecResetArray(x);CHKERRQ(ierr); 1823 } 1824 if (apply_left) { 1825 PetscScalar *ay; 1826 PetscInt i; 1827 1828 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1829 for (i=0;i<ctx->benign_n;i++) { 1830 PetscScalar sum,val; 1831 const PetscInt *idxs; 1832 PetscInt nz,j; 1833 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1834 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1835 val = -ay[idxs[nz-1]]; 1836 if (ctx->apply_p0) { 1837 sum = 0.; 1838 for (j=0;j<nz-1;j++) { 1839 sum += ay[idxs[j]]; 1840 ay[idxs[j]] += val; 1841 } 1842 ay[idxs[nz-1]] += sum; 1843 } else { 1844 for (j=0;j<nz-1;j++) { 1845 ay[idxs[j]] += val; 1846 } 1847 ay[idxs[nz-1]] = 0.; 1848 } 1849 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1850 } 1851 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1852 } 1853 PetscFunctionReturn(0); 1854 } 1855 1856 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1857 { 1858 PetscErrorCode ierr; 1859 1860 PetscFunctionBegin; 1861 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 1865 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1866 { 1867 PetscErrorCode ierr; 1868 1869 PetscFunctionBegin; 1870 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1875 { 1876 PC_IS *pcis = (PC_IS*)pc->data; 1877 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1878 PCBDDCBenignMatMult_ctx ctx; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 if (!restore) { 1883 Mat A_IB,A_BI; 1884 PetscScalar *work; 1885 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1886 1887 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1888 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1889 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1890 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1891 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1892 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1893 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1894 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1895 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1896 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1897 ctx->apply_left = PETSC_TRUE; 1898 ctx->apply_right = PETSC_FALSE; 1899 ctx->apply_p0 = PETSC_FALSE; 1900 ctx->benign_n = pcbddc->benign_n; 1901 if (reuse) { 1902 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1903 ctx->free = PETSC_FALSE; 1904 } else { /* TODO: could be optimized for successive solves */ 1905 ISLocalToGlobalMapping N_to_D; 1906 PetscInt i; 1907 1908 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1909 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1910 for (i=0;i<pcbddc->benign_n;i++) { 1911 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1912 } 1913 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1914 ctx->free = PETSC_TRUE; 1915 } 1916 ctx->A = pcis->A_IB; 1917 ctx->work = work; 1918 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1919 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1920 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1921 pcis->A_IB = A_IB; 1922 1923 /* A_BI as A_IB^T */ 1924 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1925 pcbddc->benign_original_mat = pcis->A_BI; 1926 pcis->A_BI = A_BI; 1927 } else { 1928 if (!pcbddc->benign_original_mat) { 1929 PetscFunctionReturn(0); 1930 } 1931 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1932 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1933 pcis->A_IB = ctx->A; 1934 ctx->A = NULL; 1935 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1936 pcis->A_BI = pcbddc->benign_original_mat; 1937 pcbddc->benign_original_mat = NULL; 1938 if (ctx->free) { 1939 PetscInt i; 1940 for (i=0;i<ctx->benign_n;i++) { 1941 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1942 } 1943 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1944 } 1945 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1946 ierr = PetscFree(ctx);CHKERRQ(ierr); 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 /* used just in bddc debug mode */ 1952 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1953 { 1954 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1955 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1956 Mat An; 1957 PetscErrorCode ierr; 1958 1959 PetscFunctionBegin; 1960 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1961 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1962 if (is1) { 1963 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1964 ierr = MatDestroy(&An);CHKERRQ(ierr); 1965 } else { 1966 *B = An; 1967 } 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /* TODO: add reuse flag */ 1972 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1973 { 1974 Mat Bt; 1975 PetscScalar *a,*bdata; 1976 const PetscInt *ii,*ij; 1977 PetscInt m,n,i,nnz,*bii,*bij; 1978 PetscBool flg_row; 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1983 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1984 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1985 nnz = n; 1986 for (i=0;i<ii[n];i++) { 1987 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1988 } 1989 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1990 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1991 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1992 nnz = 0; 1993 bii[0] = 0; 1994 for (i=0;i<n;i++) { 1995 PetscInt j; 1996 for (j=ii[i];j<ii[i+1];j++) { 1997 PetscScalar entry = a[j]; 1998 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1999 bij[nnz] = ij[j]; 2000 bdata[nnz] = entry; 2001 nnz++; 2002 } 2003 } 2004 bii[i+1] = nnz; 2005 } 2006 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2007 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2008 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2009 { 2010 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2011 b->free_a = PETSC_TRUE; 2012 b->free_ij = PETSC_TRUE; 2013 } 2014 *B = Bt; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 2019 { 2020 Mat B; 2021 IS is_dummy,*cc_n; 2022 ISLocalToGlobalMapping l2gmap_dummy; 2023 PCBDDCGraph graph; 2024 PetscInt i,n; 2025 PetscInt *xadj,*adjncy; 2026 PetscInt *xadj_filtered,*adjncy_filtered; 2027 PetscBool flg_row,isseqaij; 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 if (!A->rmap->N || !A->cmap->N) { 2032 *ncc = 0; 2033 *cc = NULL; 2034 PetscFunctionReturn(0); 2035 } 2036 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2037 if (!isseqaij && filter) { 2038 PetscBool isseqdense; 2039 2040 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2041 if (!isseqdense) { 2042 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2043 } else { /* TODO: rectangular case and LDA */ 2044 PetscScalar *array; 2045 PetscReal chop=1.e-6; 2046 2047 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2048 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2049 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2050 for (i=0;i<n;i++) { 2051 PetscInt j; 2052 for (j=i+1;j<n;j++) { 2053 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2054 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2055 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2056 } 2057 } 2058 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2059 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2060 } 2061 } else { 2062 B = A; 2063 } 2064 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2065 2066 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2067 if (filter) { 2068 PetscScalar *data; 2069 PetscInt j,cum; 2070 2071 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2072 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2073 cum = 0; 2074 for (i=0;i<n;i++) { 2075 PetscInt t; 2076 2077 for (j=xadj[i];j<xadj[i+1];j++) { 2078 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2079 continue; 2080 } 2081 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2082 } 2083 t = xadj_filtered[i]; 2084 xadj_filtered[i] = cum; 2085 cum += t; 2086 } 2087 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2088 } else { 2089 xadj_filtered = NULL; 2090 adjncy_filtered = NULL; 2091 } 2092 2093 /* compute local connected components using PCBDDCGraph */ 2094 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2095 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2096 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2097 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2098 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2099 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2100 if (xadj_filtered) { 2101 graph->xadj = xadj_filtered; 2102 graph->adjncy = adjncy_filtered; 2103 } else { 2104 graph->xadj = xadj; 2105 graph->adjncy = adjncy; 2106 } 2107 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2108 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2109 /* partial clean up */ 2110 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2111 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2112 if (A != B) { 2113 ierr = MatDestroy(&B);CHKERRQ(ierr); 2114 } 2115 2116 /* get back data */ 2117 if (ncc) *ncc = graph->ncc; 2118 if (cc) { 2119 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2120 for (i=0;i<graph->ncc;i++) { 2121 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2122 } 2123 *cc = cc_n; 2124 } 2125 /* clean up graph */ 2126 graph->xadj = 0; 2127 graph->adjncy = 0; 2128 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2133 { 2134 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2135 PC_IS* pcis = (PC_IS*)(pc->data); 2136 IS dirIS = NULL; 2137 PetscInt i; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2142 if (zerodiag) { 2143 Mat A; 2144 Vec vec3_N; 2145 PetscScalar *vals; 2146 const PetscInt *idxs; 2147 PetscInt nz,*count; 2148 2149 /* p0 */ 2150 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2151 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2152 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2153 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2154 for (i=0;i<nz;i++) vals[i] = 1.; 2155 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2156 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2157 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2158 /* v_I */ 2159 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2160 for (i=0;i<nz;i++) vals[i] = 0.; 2161 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2162 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2163 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2164 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2165 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2166 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2167 if (dirIS) { 2168 PetscInt n; 2169 2170 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2171 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2172 for (i=0;i<n;i++) vals[i] = 0.; 2173 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2174 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2175 } 2176 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2177 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2178 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2179 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2180 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2181 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2182 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2183 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2184 ierr = PetscFree(vals);CHKERRQ(ierr); 2185 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2186 2187 /* there should not be any pressure dofs lying on the interface */ 2188 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2189 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2190 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2191 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2192 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2193 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2194 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2195 ierr = PetscFree(count);CHKERRQ(ierr); 2196 } 2197 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2198 2199 /* check PCBDDCBenignGetOrSetP0 */ 2200 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2201 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2202 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2203 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2204 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2205 for (i=0;i<pcbddc->benign_n;i++) { 2206 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2207 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2208 } 2209 PetscFunctionReturn(0); 2210 } 2211 2212 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2213 { 2214 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2215 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2216 PetscInt nz,n; 2217 PetscInt *interior_dofs,n_interior_dofs,nneu; 2218 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2223 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2224 for (n=0;n<pcbddc->benign_n;n++) { 2225 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2226 } 2227 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2228 pcbddc->benign_n = 0; 2229 2230 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2231 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2232 Checks if all the pressure dofs in each subdomain have a zero diagonal 2233 If not, a change of basis on pressures is not needed 2234 since the local Schur complements are already SPD 2235 */ 2236 has_null_pressures = PETSC_TRUE; 2237 have_null = PETSC_TRUE; 2238 if (pcbddc->n_ISForDofsLocal) { 2239 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2240 2241 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2242 ierr = PetscOptionsInt ("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2243 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2244 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2245 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2246 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2247 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2248 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2249 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2250 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2251 if (!sorted) { 2252 ierr = ISSort(pressures);CHKERRQ(ierr); 2253 } 2254 } else { 2255 pressures = NULL; 2256 } 2257 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2258 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2259 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2260 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2261 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2262 if (!sorted) { 2263 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2264 } 2265 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2266 zerodiag_save = zerodiag; 2267 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2268 if (!nz) { 2269 if (n) have_null = PETSC_FALSE; 2270 has_null_pressures = PETSC_FALSE; 2271 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2272 } 2273 recompute_zerodiag = PETSC_FALSE; 2274 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2275 zerodiag_subs = NULL; 2276 pcbddc->benign_n = 0; 2277 n_interior_dofs = 0; 2278 interior_dofs = NULL; 2279 nneu = 0; 2280 if (pcbddc->NeumannBoundariesLocal) { 2281 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2282 } 2283 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2284 if (checkb) { /* need to compute interior nodes */ 2285 PetscInt n,i,j; 2286 PetscInt n_neigh,*neigh,*n_shared,**shared; 2287 PetscInt *iwork; 2288 2289 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2290 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2291 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2292 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2293 for (i=1;i<n_neigh;i++) 2294 for (j=0;j<n_shared[i];j++) 2295 iwork[shared[i][j]] += 1; 2296 for (i=0;i<n;i++) 2297 if (!iwork[i]) 2298 interior_dofs[n_interior_dofs++] = i; 2299 ierr = PetscFree(iwork);CHKERRQ(ierr); 2300 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2301 } 2302 if (has_null_pressures) { 2303 IS *subs; 2304 PetscInt nsubs,i,j,nl; 2305 const PetscInt *idxs; 2306 PetscScalar *array; 2307 Vec *work; 2308 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2309 2310 subs = pcbddc->local_subs; 2311 nsubs = pcbddc->n_local_subs; 2312 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2313 if (checkb) { 2314 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2315 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2316 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2317 /* work[0] = 1_p */ 2318 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2319 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2320 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2321 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2322 /* work[0] = 1_v */ 2323 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2324 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2325 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2326 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2327 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2328 } 2329 if (nsubs > 1) { 2330 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2331 for (i=0;i<nsubs;i++) { 2332 ISLocalToGlobalMapping l2g; 2333 IS t_zerodiag_subs; 2334 PetscInt nl; 2335 2336 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2337 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2338 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2339 if (nl) { 2340 PetscBool valid = PETSC_TRUE; 2341 2342 if (checkb) { 2343 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2344 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2345 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2346 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2347 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2348 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2349 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2350 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2351 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2352 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2353 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2354 for (j=0;j<n_interior_dofs;j++) { 2355 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2356 valid = PETSC_FALSE; 2357 break; 2358 } 2359 } 2360 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2361 } 2362 if (valid && nneu) { 2363 const PetscInt *idxs; 2364 PetscInt nzb; 2365 2366 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2367 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2368 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2369 if (nzb) valid = PETSC_FALSE; 2370 } 2371 if (valid && pressures) { 2372 IS t_pressure_subs; 2373 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2374 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2375 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2376 } 2377 if (valid) { 2378 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2379 pcbddc->benign_n++; 2380 } else { 2381 recompute_zerodiag = PETSC_TRUE; 2382 } 2383 } 2384 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2385 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2386 } 2387 } else { /* there's just one subdomain (or zero if they have not been detected */ 2388 PetscBool valid = PETSC_TRUE; 2389 2390 if (nneu) valid = PETSC_FALSE; 2391 if (valid && pressures) { 2392 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2393 } 2394 if (valid && checkb) { 2395 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2396 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2397 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2398 for (j=0;j<n_interior_dofs;j++) { 2399 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2400 valid = PETSC_FALSE; 2401 break; 2402 } 2403 } 2404 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2405 } 2406 if (valid) { 2407 pcbddc->benign_n = 1; 2408 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2409 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2410 zerodiag_subs[0] = zerodiag; 2411 } 2412 } 2413 if (checkb) { 2414 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2415 } 2416 } 2417 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2418 2419 if (!pcbddc->benign_n) { 2420 PetscInt n; 2421 2422 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2423 recompute_zerodiag = PETSC_FALSE; 2424 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2425 if (n) { 2426 has_null_pressures = PETSC_FALSE; 2427 have_null = PETSC_FALSE; 2428 } 2429 } 2430 2431 /* final check for null pressures */ 2432 if (zerodiag && pressures) { 2433 PetscInt nz,np; 2434 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2435 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2436 if (nz != np) have_null = PETSC_FALSE; 2437 } 2438 2439 if (recompute_zerodiag) { 2440 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2441 if (pcbddc->benign_n == 1) { 2442 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2443 zerodiag = zerodiag_subs[0]; 2444 } else { 2445 PetscInt i,nzn,*new_idxs; 2446 2447 nzn = 0; 2448 for (i=0;i<pcbddc->benign_n;i++) { 2449 PetscInt ns; 2450 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2451 nzn += ns; 2452 } 2453 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2454 nzn = 0; 2455 for (i=0;i<pcbddc->benign_n;i++) { 2456 PetscInt ns,*idxs; 2457 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2458 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2459 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2460 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2461 nzn += ns; 2462 } 2463 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2464 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2465 } 2466 have_null = PETSC_FALSE; 2467 } 2468 2469 /* Prepare matrix to compute no-net-flux */ 2470 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2471 Mat A,loc_divudotp; 2472 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2473 IS row,col,isused = NULL; 2474 PetscInt M,N,n,st,n_isused; 2475 2476 if (pressures) { 2477 isused = pressures; 2478 } else { 2479 isused = zerodiag_save; 2480 } 2481 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2482 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2483 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2484 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2485 n_isused = 0; 2486 if (isused) { 2487 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2488 } 2489 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2490 st = st-n_isused; 2491 if (n) { 2492 const PetscInt *gidxs; 2493 2494 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2495 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2496 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2497 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2498 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2499 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2500 } else { 2501 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2502 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2503 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2504 } 2505 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2506 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2507 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2508 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2509 ierr = ISDestroy(&row);CHKERRQ(ierr); 2510 ierr = ISDestroy(&col);CHKERRQ(ierr); 2511 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2512 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2513 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2514 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2515 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2516 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2517 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2518 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2519 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2520 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2521 } 2522 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2523 2524 /* change of basis and p0 dofs */ 2525 if (has_null_pressures) { 2526 IS zerodiagc; 2527 const PetscInt *idxs,*idxsc; 2528 PetscInt i,s,*nnz; 2529 2530 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2531 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2532 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2533 /* local change of basis for pressures */ 2534 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2535 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2536 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2537 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2538 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2539 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2540 for (i=0;i<pcbddc->benign_n;i++) { 2541 PetscInt nzs,j; 2542 2543 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2544 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2545 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2546 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2547 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2548 } 2549 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2550 ierr = PetscFree(nnz);CHKERRQ(ierr); 2551 /* set identity on velocities */ 2552 for (i=0;i<n-nz;i++) { 2553 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2554 } 2555 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2556 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2557 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2558 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2559 /* set change on pressures */ 2560 for (s=0;s<pcbddc->benign_n;s++) { 2561 PetscScalar *array; 2562 PetscInt nzs; 2563 2564 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2565 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2566 for (i=0;i<nzs-1;i++) { 2567 PetscScalar vals[2]; 2568 PetscInt cols[2]; 2569 2570 cols[0] = idxs[i]; 2571 cols[1] = idxs[nzs-1]; 2572 vals[0] = 1.; 2573 vals[1] = 1.; 2574 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2575 } 2576 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2577 for (i=0;i<nzs-1;i++) array[i] = -1.; 2578 array[nzs-1] = 1.; 2579 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2580 /* store local idxs for p0 */ 2581 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2582 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2583 ierr = PetscFree(array);CHKERRQ(ierr); 2584 } 2585 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2586 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587 /* project if needed */ 2588 if (pcbddc->benign_change_explicit) { 2589 Mat M; 2590 2591 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2592 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2593 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2594 ierr = MatDestroy(&M);CHKERRQ(ierr); 2595 } 2596 /* store global idxs for p0 */ 2597 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2598 } 2599 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2600 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2601 2602 /* determines if the coarse solver will be singular or not */ 2603 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2604 /* determines if the problem has subdomains with 0 pressure block */ 2605 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2606 *zerodiaglocal = zerodiag; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2611 { 2612 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2613 PetscScalar *array; 2614 PetscErrorCode ierr; 2615 2616 PetscFunctionBegin; 2617 if (!pcbddc->benign_sf) { 2618 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2619 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2620 } 2621 if (get) { 2622 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2623 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2624 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2625 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2626 } else { 2627 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2628 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2629 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2630 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2631 } 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2636 { 2637 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2638 PetscErrorCode ierr; 2639 2640 PetscFunctionBegin; 2641 /* TODO: add error checking 2642 - avoid nested pop (or push) calls. 2643 - cannot push before pop. 2644 - cannot call this if pcbddc->local_mat is NULL 2645 */ 2646 if (!pcbddc->benign_n) { 2647 PetscFunctionReturn(0); 2648 } 2649 if (pop) { 2650 if (pcbddc->benign_change_explicit) { 2651 IS is_p0; 2652 MatReuse reuse; 2653 2654 /* extract B_0 */ 2655 reuse = MAT_INITIAL_MATRIX; 2656 if (pcbddc->benign_B0) { 2657 reuse = MAT_REUSE_MATRIX; 2658 } 2659 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2660 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2661 /* remove rows and cols from local problem */ 2662 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2663 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2664 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2665 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2666 } else { 2667 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2668 PetscScalar *vals; 2669 PetscInt i,n,*idxs_ins; 2670 2671 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2672 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2673 if (!pcbddc->benign_B0) { 2674 PetscInt *nnz; 2675 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2676 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2677 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2678 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2679 for (i=0;i<pcbddc->benign_n;i++) { 2680 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2681 nnz[i] = n - nnz[i]; 2682 } 2683 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2684 ierr = PetscFree(nnz);CHKERRQ(ierr); 2685 } 2686 2687 for (i=0;i<pcbddc->benign_n;i++) { 2688 PetscScalar *array; 2689 PetscInt *idxs,j,nz,cum; 2690 2691 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2692 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2693 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2694 for (j=0;j<nz;j++) vals[j] = 1.; 2695 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2696 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2697 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2698 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2699 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2700 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2701 cum = 0; 2702 for (j=0;j<n;j++) { 2703 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2704 vals[cum] = array[j]; 2705 idxs_ins[cum] = j; 2706 cum++; 2707 } 2708 } 2709 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2710 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2711 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2712 } 2713 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2714 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2715 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2716 } 2717 } else { /* push */ 2718 if (pcbddc->benign_change_explicit) { 2719 PetscInt i; 2720 2721 for (i=0;i<pcbddc->benign_n;i++) { 2722 PetscScalar *B0_vals; 2723 PetscInt *B0_cols,B0_ncol; 2724 2725 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2726 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2727 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2728 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2729 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2730 } 2731 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2732 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2733 } else { 2734 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2735 } 2736 } 2737 PetscFunctionReturn(0); 2738 } 2739 2740 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2741 { 2742 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2743 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2744 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2745 PetscBLASInt *B_iwork,*B_ifail; 2746 PetscScalar *work,lwork; 2747 PetscScalar *St,*S,*eigv; 2748 PetscScalar *Sarray,*Starray; 2749 PetscReal *eigs,thresh; 2750 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2751 PetscBool allocated_S_St; 2752 #if defined(PETSC_USE_COMPLEX) 2753 PetscReal *rwork; 2754 #endif 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2759 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2760 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2761 2762 if (pcbddc->dbg_flag) { 2763 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2764 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2765 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2766 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2767 } 2768 2769 if (pcbddc->dbg_flag) { 2770 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2771 } 2772 2773 /* max size of subsets */ 2774 mss = 0; 2775 for (i=0;i<sub_schurs->n_subs;i++) { 2776 PetscInt subset_size; 2777 2778 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2779 mss = PetscMax(mss,subset_size); 2780 } 2781 2782 /* min/max and threshold */ 2783 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2784 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2785 nmax = PetscMax(nmin,nmax); 2786 allocated_S_St = PETSC_FALSE; 2787 if (nmin) { 2788 allocated_S_St = PETSC_TRUE; 2789 } 2790 2791 /* allocate lapack workspace */ 2792 cum = cum2 = 0; 2793 maxneigs = 0; 2794 for (i=0;i<sub_schurs->n_subs;i++) { 2795 PetscInt n,subset_size; 2796 2797 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2798 n = PetscMin(subset_size,nmax); 2799 cum += subset_size; 2800 cum2 += subset_size*n; 2801 maxneigs = PetscMax(maxneigs,n); 2802 } 2803 if (mss) { 2804 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2805 PetscBLASInt B_itype = 1; 2806 PetscBLASInt B_N = mss; 2807 PetscReal zero = 0.0; 2808 PetscReal eps = 0.0; /* dlamch? */ 2809 2810 B_lwork = -1; 2811 S = NULL; 2812 St = NULL; 2813 eigs = NULL; 2814 eigv = NULL; 2815 B_iwork = NULL; 2816 B_ifail = NULL; 2817 #if defined(PETSC_USE_COMPLEX) 2818 rwork = NULL; 2819 #endif 2820 thresh = 1.0; 2821 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2822 #if defined(PETSC_USE_COMPLEX) 2823 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2824 #else 2825 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2826 #endif 2827 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2828 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2829 } else { 2830 /* TODO */ 2831 } 2832 } else { 2833 lwork = 0; 2834 } 2835 2836 nv = 0; 2837 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2838 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2839 } 2840 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2841 if (allocated_S_St) { 2842 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2843 } 2844 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2845 #if defined(PETSC_USE_COMPLEX) 2846 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2847 #endif 2848 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2849 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2850 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2851 nv+cum,&pcbddc->adaptive_constraints_idxs, 2852 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2853 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2854 2855 maxneigs = 0; 2856 cum = cumarray = 0; 2857 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2858 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2859 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2860 const PetscInt *idxs; 2861 2862 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2863 for (cum=0;cum<nv;cum++) { 2864 pcbddc->adaptive_constraints_n[cum] = 1; 2865 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2866 pcbddc->adaptive_constraints_data[cum] = 1.0; 2867 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2868 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2869 } 2870 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2871 } 2872 2873 if (mss) { /* multilevel */ 2874 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2875 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2876 } 2877 2878 thresh = pcbddc->adaptive_threshold; 2879 for (i=0;i<sub_schurs->n_subs;i++) { 2880 const PetscInt *idxs; 2881 PetscReal upper,lower; 2882 PetscInt j,subset_size,eigs_start = 0; 2883 PetscBLASInt B_N; 2884 PetscBool same_data = PETSC_FALSE; 2885 2886 if (pcbddc->use_deluxe_scaling) { 2887 upper = PETSC_MAX_REAL; 2888 lower = thresh; 2889 } else { 2890 upper = 1./thresh; 2891 lower = 0.; 2892 } 2893 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2894 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2895 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2896 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2897 if (sub_schurs->is_hermitian) { 2898 PetscInt j,k; 2899 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2900 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2901 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2902 } 2903 for (j=0;j<subset_size;j++) { 2904 for (k=j;k<subset_size;k++) { 2905 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2906 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2907 } 2908 } 2909 } else { 2910 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2911 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2912 } 2913 } else { 2914 S = Sarray + cumarray; 2915 St = Starray + cumarray; 2916 } 2917 /* see if we can save some work */ 2918 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2919 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2920 } 2921 2922 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2923 B_neigs = 0; 2924 } else { 2925 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2926 PetscBLASInt B_itype = 1; 2927 PetscBLASInt B_IL, B_IU; 2928 PetscReal eps = -1.0; /* dlamch? */ 2929 PetscInt nmin_s; 2930 PetscBool compute_range = PETSC_FALSE; 2931 2932 if (pcbddc->dbg_flag) { 2933 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2934 } 2935 2936 compute_range = PETSC_FALSE; 2937 if (thresh > 1.+PETSC_SMALL && !same_data) { 2938 compute_range = PETSC_TRUE; 2939 } 2940 2941 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2942 if (compute_range) { 2943 2944 /* ask for eigenvalues larger than thresh */ 2945 #if defined(PETSC_USE_COMPLEX) 2946 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2947 #else 2948 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2949 #endif 2950 } else if (!same_data) { 2951 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2952 B_IL = 1; 2953 #if defined(PETSC_USE_COMPLEX) 2954 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2955 #else 2956 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2957 #endif 2958 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2959 PetscInt k; 2960 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2961 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2962 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2963 nmin = nmax; 2964 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2965 for (k=0;k<nmax;k++) { 2966 eigs[k] = 1./PETSC_SMALL; 2967 eigv[k*(subset_size+1)] = 1.0; 2968 } 2969 } 2970 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2971 if (B_ierr) { 2972 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2973 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2974 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2975 } 2976 2977 if (B_neigs > nmax) { 2978 if (pcbddc->dbg_flag) { 2979 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2980 } 2981 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2982 B_neigs = nmax; 2983 } 2984 2985 nmin_s = PetscMin(nmin,B_N); 2986 if (B_neigs < nmin_s) { 2987 PetscBLASInt B_neigs2; 2988 2989 if (pcbddc->use_deluxe_scaling) { 2990 B_IL = B_N - nmin_s + 1; 2991 B_IU = B_N - B_neigs; 2992 } else { 2993 B_IL = B_neigs + 1; 2994 B_IU = nmin_s; 2995 } 2996 if (pcbddc->dbg_flag) { 2997 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2998 } 2999 if (sub_schurs->is_hermitian) { 3000 PetscInt j,k; 3001 for (j=0;j<subset_size;j++) { 3002 for (k=j;k<subset_size;k++) { 3003 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3004 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3005 } 3006 } 3007 } else { 3008 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3009 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3010 } 3011 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3012 #if defined(PETSC_USE_COMPLEX) 3013 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3014 #else 3015 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3016 #endif 3017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3018 B_neigs += B_neigs2; 3019 } 3020 if (B_ierr) { 3021 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3022 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3023 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3024 } 3025 if (pcbddc->dbg_flag) { 3026 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3027 for (j=0;j<B_neigs;j++) { 3028 if (eigs[j] == 0.0) { 3029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3030 } else { 3031 if (pcbddc->use_deluxe_scaling) { 3032 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3033 } else { 3034 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3035 } 3036 } 3037 } 3038 } 3039 } else { 3040 /* TODO */ 3041 } 3042 } 3043 /* change the basis back to the original one */ 3044 if (sub_schurs->change) { 3045 Mat change,phi,phit; 3046 3047 if (pcbddc->dbg_flag > 1) { 3048 PetscInt ii; 3049 for (ii=0;ii<B_neigs;ii++) { 3050 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3051 for (j=0;j<B_N;j++) { 3052 #if defined(PETSC_USE_COMPLEX) 3053 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3054 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3056 #else 3057 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3058 #endif 3059 } 3060 } 3061 } 3062 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3063 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3064 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3065 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3066 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3067 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3068 } 3069 maxneigs = PetscMax(B_neigs,maxneigs); 3070 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3071 if (B_neigs) { 3072 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3073 3074 if (pcbddc->dbg_flag > 1) { 3075 PetscInt ii; 3076 for (ii=0;ii<B_neigs;ii++) { 3077 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3078 for (j=0;j<B_N;j++) { 3079 #if defined(PETSC_USE_COMPLEX) 3080 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3081 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3082 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3083 #else 3084 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3085 #endif 3086 } 3087 } 3088 } 3089 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3090 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3091 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3092 cum++; 3093 } 3094 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3095 /* shift for next computation */ 3096 cumarray += subset_size*subset_size; 3097 } 3098 if (pcbddc->dbg_flag) { 3099 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3100 } 3101 3102 if (mss) { 3103 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3104 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3105 /* destroy matrices (junk) */ 3106 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3107 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3108 } 3109 if (allocated_S_St) { 3110 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3111 } 3112 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3113 #if defined(PETSC_USE_COMPLEX) 3114 ierr = PetscFree(rwork);CHKERRQ(ierr); 3115 #endif 3116 if (pcbddc->dbg_flag) { 3117 PetscInt maxneigs_r; 3118 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3120 } 3121 PetscFunctionReturn(0); 3122 } 3123 3124 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3125 { 3126 PetscScalar *coarse_submat_vals; 3127 PetscErrorCode ierr; 3128 3129 PetscFunctionBegin; 3130 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3131 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3132 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3133 3134 /* Setup local neumann solver ksp_R */ 3135 /* PCBDDCSetUpLocalScatters should be called first! */ 3136 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3137 3138 /* 3139 Setup local correction and local part of coarse basis. 3140 Gives back the dense local part of the coarse matrix in column major ordering 3141 */ 3142 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3143 3144 /* Compute total number of coarse nodes and setup coarse solver */ 3145 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3146 3147 /* free */ 3148 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3149 PetscFunctionReturn(0); 3150 } 3151 3152 PetscErrorCode PCBDDCResetCustomization(PC pc) 3153 { 3154 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3155 PetscErrorCode ierr; 3156 3157 PetscFunctionBegin; 3158 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3159 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3160 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3161 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3162 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3163 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3164 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3165 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3166 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3167 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3168 PetscFunctionReturn(0); 3169 } 3170 3171 PetscErrorCode PCBDDCResetTopography(PC pc) 3172 { 3173 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3174 PetscInt i; 3175 PetscErrorCode ierr; 3176 3177 PetscFunctionBegin; 3178 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3179 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3180 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3181 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3182 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3183 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3184 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3185 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3186 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3187 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3188 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3189 for (i=0;i<pcbddc->n_local_subs;i++) { 3190 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3191 } 3192 pcbddc->n_local_subs = 0; 3193 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3194 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3195 pcbddc->graphanalyzed = PETSC_FALSE; 3196 pcbddc->recompute_topography = PETSC_TRUE; 3197 PetscFunctionReturn(0); 3198 } 3199 3200 PetscErrorCode PCBDDCResetSolvers(PC pc) 3201 { 3202 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3203 PetscErrorCode ierr; 3204 3205 PetscFunctionBegin; 3206 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3207 if (pcbddc->coarse_phi_B) { 3208 PetscScalar *array; 3209 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3210 ierr = PetscFree(array);CHKERRQ(ierr); 3211 } 3212 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3213 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3214 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3215 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3216 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3217 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3218 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3219 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3220 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3221 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3222 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3223 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3224 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3225 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3226 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3227 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3228 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3229 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3230 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3231 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3232 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3233 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3234 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3235 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3236 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3237 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3238 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3239 if (pcbddc->benign_zerodiag_subs) { 3240 PetscInt i; 3241 for (i=0;i<pcbddc->benign_n;i++) { 3242 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3243 } 3244 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3245 } 3246 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3247 PetscFunctionReturn(0); 3248 } 3249 3250 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3251 { 3252 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3253 PC_IS *pcis = (PC_IS*)pc->data; 3254 VecType impVecType; 3255 PetscInt n_constraints,n_R,old_size; 3256 PetscErrorCode ierr; 3257 3258 PetscFunctionBegin; 3259 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3260 n_R = pcis->n - pcbddc->n_vertices; 3261 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3262 /* local work vectors (try to avoid unneeded work)*/ 3263 /* R nodes */ 3264 old_size = -1; 3265 if (pcbddc->vec1_R) { 3266 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3267 } 3268 if (n_R != old_size) { 3269 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3270 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3271 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3272 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3273 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3274 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3275 } 3276 /* local primal dofs */ 3277 old_size = -1; 3278 if (pcbddc->vec1_P) { 3279 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3280 } 3281 if (pcbddc->local_primal_size != old_size) { 3282 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3283 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3284 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3285 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3286 } 3287 /* local explicit constraints */ 3288 old_size = -1; 3289 if (pcbddc->vec1_C) { 3290 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3291 } 3292 if (n_constraints && n_constraints != old_size) { 3293 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3294 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3295 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3296 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3297 } 3298 PetscFunctionReturn(0); 3299 } 3300 3301 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3302 { 3303 PetscErrorCode ierr; 3304 /* pointers to pcis and pcbddc */ 3305 PC_IS* pcis = (PC_IS*)pc->data; 3306 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3307 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3308 /* submatrices of local problem */ 3309 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3310 /* submatrices of local coarse problem */ 3311 Mat S_VV,S_CV,S_VC,S_CC; 3312 /* working matrices */ 3313 Mat C_CR; 3314 /* additional working stuff */ 3315 PC pc_R; 3316 Mat F; 3317 Vec dummy_vec; 3318 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3319 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3320 PetscScalar *work; 3321 PetscInt *idx_V_B; 3322 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3323 PetscInt i,n_R,n_D,n_B; 3324 3325 /* some shortcuts to scalars */ 3326 PetscScalar one=1.0,m_one=-1.0; 3327 3328 PetscFunctionBegin; 3329 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3330 3331 /* Set Non-overlapping dimensions */ 3332 n_vertices = pcbddc->n_vertices; 3333 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3334 n_B = pcis->n_B; 3335 n_D = pcis->n - n_B; 3336 n_R = pcis->n - n_vertices; 3337 3338 /* vertices in boundary numbering */ 3339 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3340 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3341 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3342 3343 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3344 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3345 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3346 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3347 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3348 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3349 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3350 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3352 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3353 3354 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3355 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3356 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3357 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3358 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3359 lda_rhs = n_R; 3360 need_benign_correction = PETSC_FALSE; 3361 if (isLU || isILU || isCHOL) { 3362 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3363 } else if (sub_schurs && sub_schurs->reuse_solver) { 3364 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3365 MatFactorType type; 3366 3367 F = reuse_solver->F; 3368 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3369 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3370 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3371 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3372 } else { 3373 F = NULL; 3374 } 3375 3376 /* allocate workspace */ 3377 n = 0; 3378 if (n_constraints) { 3379 n += lda_rhs*n_constraints; 3380 } 3381 if (n_vertices) { 3382 n = PetscMax(2*lda_rhs*n_vertices,n); 3383 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3384 } 3385 if (!pcbddc->symmetric_primal) { 3386 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3387 } 3388 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3389 3390 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3391 dummy_vec = NULL; 3392 if (need_benign_correction && lda_rhs != n_R && F) { 3393 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3394 } 3395 3396 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3397 if (n_constraints) { 3398 Mat M1,M2,M3,C_B; 3399 IS is_aux; 3400 PetscScalar *array,*array2; 3401 3402 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3403 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3404 3405 /* Extract constraints on R nodes: C_{CR} */ 3406 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3407 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3408 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3409 3410 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3411 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3412 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3413 for (i=0;i<n_constraints;i++) { 3414 const PetscScalar *row_cmat_values; 3415 const PetscInt *row_cmat_indices; 3416 PetscInt size_of_constraint,j; 3417 3418 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3419 for (j=0;j<size_of_constraint;j++) { 3420 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3421 } 3422 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3423 } 3424 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3425 if (F) { 3426 Mat B; 3427 3428 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3429 if (need_benign_correction) { 3430 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3431 3432 /* rhs is already zero on interior dofs, no need to change the rhs */ 3433 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3434 } 3435 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3436 if (need_benign_correction) { 3437 PetscScalar *marr; 3438 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3439 3440 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3441 if (lda_rhs != n_R) { 3442 for (i=0;i<n_constraints;i++) { 3443 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3444 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3445 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3446 } 3447 } else { 3448 for (i=0;i<n_constraints;i++) { 3449 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3450 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3451 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3452 } 3453 } 3454 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3455 } 3456 ierr = MatDestroy(&B);CHKERRQ(ierr); 3457 } else { 3458 PetscScalar *marr; 3459 3460 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3461 for (i=0;i<n_constraints;i++) { 3462 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3463 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3464 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3465 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3466 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3467 } 3468 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3469 } 3470 if (!pcbddc->switch_static) { 3471 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3472 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3473 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3474 for (i=0;i<n_constraints;i++) { 3475 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3476 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3477 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3478 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3479 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3480 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3481 } 3482 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3483 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3484 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3485 } else { 3486 if (lda_rhs != n_R) { 3487 IS dummy; 3488 3489 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3490 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3491 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3492 } else { 3493 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3494 pcbddc->local_auxmat2 = local_auxmat2_R; 3495 } 3496 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3497 } 3498 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3499 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3500 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3501 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3502 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3503 if (isCHOL) { 3504 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3505 } else { 3506 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3507 } 3508 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3509 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3510 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3511 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3512 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3513 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3514 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3515 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3516 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3517 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3518 } 3519 3520 /* Get submatrices from subdomain matrix */ 3521 if (n_vertices) { 3522 IS is_aux; 3523 3524 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3525 IS tis; 3526 3527 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3528 ierr = ISSort(tis);CHKERRQ(ierr); 3529 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3530 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3531 } else { 3532 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3533 } 3534 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3535 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3536 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3537 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3538 } 3539 3540 /* Matrix of coarse basis functions (local) */ 3541 if (pcbddc->coarse_phi_B) { 3542 PetscInt on_B,on_primal,on_D=n_D; 3543 if (pcbddc->coarse_phi_D) { 3544 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3545 } 3546 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3547 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3548 PetscScalar *marray; 3549 3550 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3551 ierr = PetscFree(marray);CHKERRQ(ierr); 3552 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3553 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3554 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3555 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3556 } 3557 } 3558 3559 if (!pcbddc->coarse_phi_B) { 3560 PetscScalar *marr; 3561 3562 /* memory size */ 3563 n = n_B*pcbddc->local_primal_size; 3564 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3565 if (!pcbddc->symmetric_primal) n *= 2; 3566 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3568 marr += n_B*pcbddc->local_primal_size; 3569 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3571 marr += n_D*pcbddc->local_primal_size; 3572 } 3573 if (!pcbddc->symmetric_primal) { 3574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3575 marr += n_B*pcbddc->local_primal_size; 3576 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3577 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3578 } 3579 } else { 3580 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3581 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3582 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3583 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3584 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3585 } 3586 } 3587 } 3588 3589 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3590 p0_lidx_I = NULL; 3591 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3592 const PetscInt *idxs; 3593 3594 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3595 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3596 for (i=0;i<pcbddc->benign_n;i++) { 3597 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3598 } 3599 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3600 } 3601 3602 /* vertices */ 3603 if (n_vertices) { 3604 3605 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3606 3607 if (n_R) { 3608 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3609 PetscBLASInt B_N,B_one = 1; 3610 PetscScalar *x,*y; 3611 PetscBool isseqaij; 3612 3613 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3614 if (need_benign_correction) { 3615 ISLocalToGlobalMapping RtoN; 3616 IS is_p0; 3617 PetscInt *idxs_p0,n; 3618 3619 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3620 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3621 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3622 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3623 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3624 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3625 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3626 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3627 } 3628 3629 if (lda_rhs == n_R) { 3630 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3631 } else { 3632 PetscScalar *av,*array; 3633 const PetscInt *xadj,*adjncy; 3634 PetscInt n; 3635 PetscBool flg_row; 3636 3637 array = work+lda_rhs*n_vertices; 3638 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3639 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3640 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3641 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3642 for (i=0;i<n;i++) { 3643 PetscInt j; 3644 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3645 } 3646 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3647 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3648 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3649 } 3650 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3651 if (need_benign_correction) { 3652 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3653 PetscScalar *marr; 3654 3655 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3656 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3657 3658 | 0 0 0 | (V) 3659 L = | 0 0 -1 | (P-p0) 3660 | 0 0 -1 | (p0) 3661 3662 */ 3663 for (i=0;i<reuse_solver->benign_n;i++) { 3664 const PetscScalar *vals; 3665 const PetscInt *idxs,*idxs_zero; 3666 PetscInt n,j,nz; 3667 3668 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3669 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3670 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3671 for (j=0;j<n;j++) { 3672 PetscScalar val = vals[j]; 3673 PetscInt k,col = idxs[j]; 3674 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3675 } 3676 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3677 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3678 } 3679 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3680 } 3681 if (F) { 3682 /* need to correct the rhs */ 3683 if (need_benign_correction) { 3684 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3685 PetscScalar *marr; 3686 3687 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3688 if (lda_rhs != n_R) { 3689 for (i=0;i<n_vertices;i++) { 3690 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3691 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3692 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3693 } 3694 } else { 3695 for (i=0;i<n_vertices;i++) { 3696 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3697 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3698 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3699 } 3700 } 3701 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3702 } 3703 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3704 /* need to correct the solution */ 3705 if (need_benign_correction) { 3706 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3707 PetscScalar *marr; 3708 3709 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3710 if (lda_rhs != n_R) { 3711 for (i=0;i<n_vertices;i++) { 3712 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3713 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3714 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3715 } 3716 } else { 3717 for (i=0;i<n_vertices;i++) { 3718 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3719 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3720 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3721 } 3722 } 3723 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3724 } 3725 } else { 3726 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3727 for (i=0;i<n_vertices;i++) { 3728 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3729 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3730 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3731 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3732 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3733 } 3734 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3735 } 3736 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3737 /* S_VV and S_CV */ 3738 if (n_constraints) { 3739 Mat B; 3740 3741 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3742 for (i=0;i<n_vertices;i++) { 3743 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3744 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3745 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3746 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3747 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3748 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3749 } 3750 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3751 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3752 ierr = MatDestroy(&B);CHKERRQ(ierr); 3753 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3754 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3755 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3756 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3757 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3758 ierr = MatDestroy(&B);CHKERRQ(ierr); 3759 } 3760 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3761 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3762 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3763 } 3764 if (lda_rhs != n_R) { 3765 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3766 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3767 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3768 } 3769 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3770 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3771 if (need_benign_correction) { 3772 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3773 PetscScalar *marr,*sums; 3774 3775 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3776 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3777 for (i=0;i<reuse_solver->benign_n;i++) { 3778 const PetscScalar *vals; 3779 const PetscInt *idxs,*idxs_zero; 3780 PetscInt n,j,nz; 3781 3782 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3783 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3784 for (j=0;j<n_vertices;j++) { 3785 PetscInt k; 3786 sums[j] = 0.; 3787 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3788 } 3789 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3790 for (j=0;j<n;j++) { 3791 PetscScalar val = vals[j]; 3792 PetscInt k; 3793 for (k=0;k<n_vertices;k++) { 3794 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3795 } 3796 } 3797 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3798 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3799 } 3800 ierr = PetscFree(sums);CHKERRQ(ierr); 3801 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3802 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3803 } 3804 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3805 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3806 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3807 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3808 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3809 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3810 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3811 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3812 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3813 } else { 3814 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3815 } 3816 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3817 3818 /* coarse basis functions */ 3819 for (i=0;i<n_vertices;i++) { 3820 PetscScalar *y; 3821 3822 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3823 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3824 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3825 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3826 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3827 y[n_B*i+idx_V_B[i]] = 1.0; 3828 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3829 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3830 3831 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3832 PetscInt j; 3833 3834 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3835 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3836 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3837 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3838 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3839 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3840 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3841 } 3842 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3843 } 3844 /* if n_R == 0 the object is not destroyed */ 3845 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3846 } 3847 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3848 3849 if (n_constraints) { 3850 Mat B; 3851 3852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3853 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3854 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3855 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3856 if (n_vertices) { 3857 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3858 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3859 } else { 3860 Mat S_VCt; 3861 3862 if (lda_rhs != n_R) { 3863 ierr = MatDestroy(&B);CHKERRQ(ierr); 3864 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3865 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3866 } 3867 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3868 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3869 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3870 } 3871 } 3872 ierr = MatDestroy(&B);CHKERRQ(ierr); 3873 /* coarse basis functions */ 3874 for (i=0;i<n_constraints;i++) { 3875 PetscScalar *y; 3876 3877 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3878 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3879 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3880 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3881 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3882 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3883 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3884 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3885 PetscInt j; 3886 3887 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3888 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3889 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3890 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3891 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3892 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3893 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3894 } 3895 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3896 } 3897 } 3898 if (n_constraints) { 3899 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3900 } 3901 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3902 3903 /* coarse matrix entries relative to B_0 */ 3904 if (pcbddc->benign_n) { 3905 Mat B0_B,B0_BPHI; 3906 IS is_dummy; 3907 PetscScalar *data; 3908 PetscInt j; 3909 3910 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3911 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3912 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3913 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3914 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3915 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3916 for (j=0;j<pcbddc->benign_n;j++) { 3917 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3918 for (i=0;i<pcbddc->local_primal_size;i++) { 3919 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3920 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3921 } 3922 } 3923 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3924 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3925 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3926 } 3927 3928 /* compute other basis functions for non-symmetric problems */ 3929 if (!pcbddc->symmetric_primal) { 3930 Mat B_V=NULL,B_C=NULL; 3931 PetscScalar *marray; 3932 3933 if (n_constraints) { 3934 Mat S_CCT,C_CRT; 3935 3936 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 3937 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3938 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3939 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3940 if (n_vertices) { 3941 Mat S_VCT; 3942 3943 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3944 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3945 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3946 } 3947 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3948 } else { 3949 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3950 } 3951 if (n_vertices && n_R) { 3952 PetscScalar *av,*marray; 3953 const PetscInt *xadj,*adjncy; 3954 PetscInt n; 3955 PetscBool flg_row; 3956 3957 /* B_V = B_V - A_VR^T */ 3958 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3959 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3960 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3961 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3962 for (i=0;i<n;i++) { 3963 PetscInt j; 3964 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3965 } 3966 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3967 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3968 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3969 } 3970 3971 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3972 if (n_vertices) { 3973 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3974 for (i=0;i<n_vertices;i++) { 3975 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3976 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3977 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3978 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3979 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3980 } 3981 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3982 } 3983 if (B_C) { 3984 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3985 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3986 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3987 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3988 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3989 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3990 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3991 } 3992 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3993 } 3994 /* coarse basis functions */ 3995 for (i=0;i<pcbddc->local_primal_size;i++) { 3996 PetscScalar *y; 3997 3998 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3999 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4000 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4001 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4002 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4003 if (i<n_vertices) { 4004 y[n_B*i+idx_V_B[i]] = 1.0; 4005 } 4006 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4007 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4008 4009 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4010 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4011 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4012 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4013 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4014 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4015 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4016 } 4017 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4018 } 4019 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4020 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4021 } 4022 4023 /* free memory */ 4024 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4025 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4026 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4027 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4028 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4029 ierr = PetscFree(work);CHKERRQ(ierr); 4030 if (n_vertices) { 4031 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4032 } 4033 if (n_constraints) { 4034 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4035 } 4036 /* Checking coarse_sub_mat and coarse basis functios */ 4037 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4038 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4039 if (pcbddc->dbg_flag) { 4040 Mat coarse_sub_mat; 4041 Mat AUXMAT,TM1,TM2,TM3,TM4; 4042 Mat coarse_phi_D,coarse_phi_B; 4043 Mat coarse_psi_D,coarse_psi_B; 4044 Mat A_II,A_BB,A_IB,A_BI; 4045 Mat C_B,CPHI; 4046 IS is_dummy; 4047 Vec mones; 4048 MatType checkmattype=MATSEQAIJ; 4049 PetscReal real_value; 4050 4051 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4052 Mat A; 4053 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4054 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4055 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4056 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4057 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4058 ierr = MatDestroy(&A);CHKERRQ(ierr); 4059 } else { 4060 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4061 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4062 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4063 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4064 } 4065 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4066 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4067 if (!pcbddc->symmetric_primal) { 4068 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4069 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4070 } 4071 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4072 4073 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4074 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4075 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4076 if (!pcbddc->symmetric_primal) { 4077 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4078 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4079 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4080 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4081 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4082 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4083 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4084 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4085 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4086 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4087 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4088 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4089 } else { 4090 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4091 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4092 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4093 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4094 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4095 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4096 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4097 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4098 } 4099 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4100 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4101 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4102 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4103 if (pcbddc->benign_n) { 4104 Mat B0_B,B0_BPHI; 4105 PetscScalar *data,*data2; 4106 PetscInt j; 4107 4108 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4109 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4110 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4111 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4112 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4113 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4114 for (j=0;j<pcbddc->benign_n;j++) { 4115 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4116 for (i=0;i<pcbddc->local_primal_size;i++) { 4117 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4118 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4119 } 4120 } 4121 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4122 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4123 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4124 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4125 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4126 } 4127 #if 0 4128 { 4129 PetscViewer viewer; 4130 char filename[256]; 4131 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4132 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4133 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4134 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4135 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4136 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4137 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4138 if (save_change) { 4139 Mat phi_B; 4140 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4141 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4142 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4143 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4144 } else { 4145 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4146 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4147 } 4148 if (pcbddc->coarse_phi_D) { 4149 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4150 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4151 } 4152 if (pcbddc->coarse_psi_B) { 4153 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4154 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4155 } 4156 if (pcbddc->coarse_psi_D) { 4157 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4158 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4159 } 4160 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4161 } 4162 #endif 4163 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4164 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4165 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4166 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4167 4168 /* check constraints */ 4169 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4170 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4171 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4172 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4173 } else { 4174 PetscScalar *data; 4175 Mat tmat; 4176 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4177 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4178 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4179 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4180 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4181 } 4182 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4183 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4184 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4185 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4187 if (!pcbddc->symmetric_primal) { 4188 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4189 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4190 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4191 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4192 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4193 } 4194 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4195 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4196 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4197 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4198 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4199 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4200 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4201 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4202 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4203 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4204 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4205 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4206 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4207 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4208 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4209 if (!pcbddc->symmetric_primal) { 4210 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4211 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4212 } 4213 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4214 } 4215 /* get back data */ 4216 *coarse_submat_vals_n = coarse_submat_vals; 4217 PetscFunctionReturn(0); 4218 } 4219 4220 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4221 { 4222 Mat *work_mat; 4223 IS isrow_s,iscol_s; 4224 PetscBool rsorted,csorted; 4225 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4226 PetscErrorCode ierr; 4227 4228 PetscFunctionBegin; 4229 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4230 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4231 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4232 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4233 4234 if (!rsorted) { 4235 const PetscInt *idxs; 4236 PetscInt *idxs_sorted,i; 4237 4238 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4239 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4240 for (i=0;i<rsize;i++) { 4241 idxs_perm_r[i] = i; 4242 } 4243 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4244 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4245 for (i=0;i<rsize;i++) { 4246 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4247 } 4248 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4249 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4250 } else { 4251 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4252 isrow_s = isrow; 4253 } 4254 4255 if (!csorted) { 4256 if (isrow == iscol) { 4257 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4258 iscol_s = isrow_s; 4259 } else { 4260 const PetscInt *idxs; 4261 PetscInt *idxs_sorted,i; 4262 4263 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4264 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4265 for (i=0;i<csize;i++) { 4266 idxs_perm_c[i] = i; 4267 } 4268 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4269 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4270 for (i=0;i<csize;i++) { 4271 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4272 } 4273 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4274 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4275 } 4276 } else { 4277 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4278 iscol_s = iscol; 4279 } 4280 4281 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4282 4283 if (!rsorted || !csorted) { 4284 Mat new_mat; 4285 IS is_perm_r,is_perm_c; 4286 4287 if (!rsorted) { 4288 PetscInt *idxs_r,i; 4289 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4290 for (i=0;i<rsize;i++) { 4291 idxs_r[idxs_perm_r[i]] = i; 4292 } 4293 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4294 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4295 } else { 4296 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4297 } 4298 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4299 4300 if (!csorted) { 4301 if (isrow_s == iscol_s) { 4302 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4303 is_perm_c = is_perm_r; 4304 } else { 4305 PetscInt *idxs_c,i; 4306 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4307 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4308 for (i=0;i<csize;i++) { 4309 idxs_c[idxs_perm_c[i]] = i; 4310 } 4311 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4312 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4313 } 4314 } else { 4315 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4316 } 4317 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4318 4319 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4320 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4321 work_mat[0] = new_mat; 4322 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4323 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4324 } 4325 4326 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4327 *B = work_mat[0]; 4328 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4329 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4330 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4331 PetscFunctionReturn(0); 4332 } 4333 4334 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4335 { 4336 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4337 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4338 Mat new_mat,lA; 4339 IS is_local,is_global; 4340 PetscInt local_size; 4341 PetscBool isseqaij; 4342 PetscErrorCode ierr; 4343 4344 PetscFunctionBegin; 4345 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4346 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4347 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4348 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4349 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4350 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4351 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4352 4353 /* check */ 4354 if (pcbddc->dbg_flag) { 4355 Vec x,x_change; 4356 PetscReal error; 4357 4358 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4359 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4360 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4361 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4362 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4363 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4364 if (!pcbddc->change_interior) { 4365 const PetscScalar *x,*y,*v; 4366 PetscReal lerror = 0.; 4367 PetscInt i; 4368 4369 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4370 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4371 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4372 for (i=0;i<local_size;i++) 4373 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4374 lerror = PetscAbsScalar(x[i]-y[i]); 4375 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4376 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4377 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4378 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4379 if (error > PETSC_SMALL) { 4380 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4381 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4382 } else { 4383 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4384 } 4385 } 4386 } 4387 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4388 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4389 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4390 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4391 if (error > PETSC_SMALL) { 4392 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4393 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4394 } else { 4395 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4396 } 4397 } 4398 ierr = VecDestroy(&x);CHKERRQ(ierr); 4399 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4400 } 4401 4402 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4403 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4404 4405 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4406 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4407 if (isseqaij) { 4408 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4409 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4410 if (lA) { 4411 Mat work; 4412 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4413 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4414 ierr = MatDestroy(&work);CHKERRQ(ierr); 4415 } 4416 } else { 4417 Mat work_mat; 4418 4419 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4420 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4421 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4422 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4423 if (lA) { 4424 Mat work; 4425 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4426 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4427 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4428 ierr = MatDestroy(&work);CHKERRQ(ierr); 4429 } 4430 } 4431 if (matis->A->symmetric_set) { 4432 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4433 #if !defined(PETSC_USE_COMPLEX) 4434 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4435 #endif 4436 } 4437 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4438 PetscFunctionReturn(0); 4439 } 4440 4441 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4442 { 4443 PC_IS* pcis = (PC_IS*)(pc->data); 4444 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4445 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4446 PetscInt *idx_R_local=NULL; 4447 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4448 PetscInt vbs,bs; 4449 PetscBT bitmask=NULL; 4450 PetscErrorCode ierr; 4451 4452 PetscFunctionBegin; 4453 /* 4454 No need to setup local scatters if 4455 - primal space is unchanged 4456 AND 4457 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4458 AND 4459 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4460 */ 4461 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4462 PetscFunctionReturn(0); 4463 } 4464 /* destroy old objects */ 4465 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4466 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4467 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4468 /* Set Non-overlapping dimensions */ 4469 n_B = pcis->n_B; 4470 n_D = pcis->n - n_B; 4471 n_vertices = pcbddc->n_vertices; 4472 4473 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4474 4475 /* create auxiliary bitmask and allocate workspace */ 4476 if (!sub_schurs || !sub_schurs->reuse_solver) { 4477 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4478 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4479 for (i=0;i<n_vertices;i++) { 4480 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4481 } 4482 4483 for (i=0, n_R=0; i<pcis->n; i++) { 4484 if (!PetscBTLookup(bitmask,i)) { 4485 idx_R_local[n_R++] = i; 4486 } 4487 } 4488 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4489 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4490 4491 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4492 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4493 } 4494 4495 /* Block code */ 4496 vbs = 1; 4497 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4498 if (bs>1 && !(n_vertices%bs)) { 4499 PetscBool is_blocked = PETSC_TRUE; 4500 PetscInt *vary; 4501 if (!sub_schurs || !sub_schurs->reuse_solver) { 4502 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4503 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4504 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4505 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4506 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4507 for (i=0; i<pcis->n/bs; i++) { 4508 if (vary[i]!=0 && vary[i]!=bs) { 4509 is_blocked = PETSC_FALSE; 4510 break; 4511 } 4512 } 4513 ierr = PetscFree(vary);CHKERRQ(ierr); 4514 } else { 4515 /* Verify directly the R set */ 4516 for (i=0; i<n_R/bs; i++) { 4517 PetscInt j,node=idx_R_local[bs*i]; 4518 for (j=1; j<bs; j++) { 4519 if (node != idx_R_local[bs*i+j]-j) { 4520 is_blocked = PETSC_FALSE; 4521 break; 4522 } 4523 } 4524 } 4525 } 4526 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4527 vbs = bs; 4528 for (i=0;i<n_R/vbs;i++) { 4529 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4530 } 4531 } 4532 } 4533 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4534 if (sub_schurs && sub_schurs->reuse_solver) { 4535 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4536 4537 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4538 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4539 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4540 reuse_solver->is_R = pcbddc->is_R_local; 4541 } else { 4542 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4543 } 4544 4545 /* print some info if requested */ 4546 if (pcbddc->dbg_flag) { 4547 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4548 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4549 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4550 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4551 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4552 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4553 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4554 } 4555 4556 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4557 if (!sub_schurs || !sub_schurs->reuse_solver) { 4558 IS is_aux1,is_aux2; 4559 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4560 4561 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4562 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4563 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4564 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4565 for (i=0; i<n_D; i++) { 4566 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4567 } 4568 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4569 for (i=0, j=0; i<n_R; i++) { 4570 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4571 aux_array1[j++] = i; 4572 } 4573 } 4574 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4575 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4576 for (i=0, j=0; i<n_B; i++) { 4577 if (!PetscBTLookup(bitmask,is_indices[i])) { 4578 aux_array2[j++] = i; 4579 } 4580 } 4581 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4582 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4583 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4584 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4585 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4586 4587 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4588 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4589 for (i=0, j=0; i<n_R; i++) { 4590 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4591 aux_array1[j++] = i; 4592 } 4593 } 4594 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4595 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4596 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4597 } 4598 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4599 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4600 } else { 4601 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4602 IS tis; 4603 PetscInt schur_size; 4604 4605 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4606 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4607 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4608 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4609 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4610 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4611 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4612 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4613 } 4614 } 4615 PetscFunctionReturn(0); 4616 } 4617 4618 4619 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4620 { 4621 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4622 PC_IS *pcis = (PC_IS*)pc->data; 4623 PC pc_temp; 4624 Mat A_RR; 4625 MatReuse reuse; 4626 PetscScalar m_one = -1.0; 4627 PetscReal value; 4628 PetscInt n_D,n_R; 4629 PetscBool check_corr[2],issbaij; 4630 PetscErrorCode ierr; 4631 /* prefixes stuff */ 4632 char dir_prefix[256],neu_prefix[256],str_level[16]; 4633 size_t len; 4634 4635 PetscFunctionBegin; 4636 4637 /* compute prefixes */ 4638 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4639 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4640 if (!pcbddc->current_level) { 4641 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4642 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4643 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4644 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4645 } else { 4646 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4647 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4648 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4649 len -= 15; /* remove "pc_bddc_coarse_" */ 4650 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4651 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4652 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4653 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4654 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4655 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4656 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4657 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4658 } 4659 4660 /* DIRICHLET PROBLEM */ 4661 if (dirichlet) { 4662 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4663 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4664 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4665 if (pcbddc->dbg_flag) { 4666 Mat A_IIn; 4667 4668 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4669 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4670 pcis->A_II = A_IIn; 4671 } 4672 } 4673 if (pcbddc->local_mat->symmetric_set) { 4674 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4675 } 4676 /* Matrix for Dirichlet problem is pcis->A_II */ 4677 n_D = pcis->n - pcis->n_B; 4678 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4679 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4680 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4681 /* default */ 4682 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4683 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4684 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4685 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4686 if (issbaij) { 4687 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4688 } else { 4689 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4690 } 4691 /* Allow user's customization */ 4692 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4693 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4694 } 4695 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4696 if (sub_schurs && sub_schurs->reuse_solver) { 4697 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4698 4699 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4700 } 4701 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4702 if (!n_D) { 4703 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4704 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4705 } 4706 /* Set Up KSP for Dirichlet problem of BDDC */ 4707 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4708 /* set ksp_D into pcis data */ 4709 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4710 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4711 pcis->ksp_D = pcbddc->ksp_D; 4712 } 4713 4714 /* NEUMANN PROBLEM */ 4715 A_RR = 0; 4716 if (neumann) { 4717 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4718 PetscInt ibs,mbs; 4719 PetscBool issbaij; 4720 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4721 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4722 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4723 if (pcbddc->ksp_R) { /* already created ksp */ 4724 PetscInt nn_R; 4725 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4726 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4727 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4728 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4729 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4730 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4731 reuse = MAT_INITIAL_MATRIX; 4732 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4733 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4734 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4735 reuse = MAT_INITIAL_MATRIX; 4736 } else { /* safe to reuse the matrix */ 4737 reuse = MAT_REUSE_MATRIX; 4738 } 4739 } 4740 /* last check */ 4741 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4742 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4743 reuse = MAT_INITIAL_MATRIX; 4744 } 4745 } else { /* first time, so we need to create the matrix */ 4746 reuse = MAT_INITIAL_MATRIX; 4747 } 4748 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4749 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4750 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4751 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4752 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4753 if (matis->A == pcbddc->local_mat) { 4754 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4755 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4756 } else { 4757 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4758 } 4759 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4760 if (matis->A == pcbddc->local_mat) { 4761 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4762 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4763 } else { 4764 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4765 } 4766 } 4767 /* extract A_RR */ 4768 if (sub_schurs && sub_schurs->reuse_solver) { 4769 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4770 4771 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4772 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4773 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4774 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4775 } else { 4776 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4777 } 4778 } else { 4779 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4780 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4781 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4782 } 4783 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4784 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4785 } 4786 if (pcbddc->local_mat->symmetric_set) { 4787 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4788 } 4789 if (!pcbddc->ksp_R) { /* create object if not present */ 4790 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4791 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4792 /* default */ 4793 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4794 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4795 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4796 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4797 if (issbaij) { 4798 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4799 } else { 4800 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4801 } 4802 /* Allow user's customization */ 4803 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4804 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4805 } 4806 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4807 if (!n_R) { 4808 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4809 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4810 } 4811 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4812 /* Reuse solver if it is present */ 4813 if (sub_schurs && sub_schurs->reuse_solver) { 4814 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4815 4816 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4817 } 4818 /* Set Up KSP for Neumann problem of BDDC */ 4819 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4820 } 4821 4822 if (pcbddc->dbg_flag) { 4823 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4824 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4825 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4826 } 4827 4828 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4829 check_corr[0] = check_corr[1] = PETSC_FALSE; 4830 if (pcbddc->NullSpace_corr[0]) { 4831 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4832 } 4833 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4834 check_corr[0] = PETSC_TRUE; 4835 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4836 } 4837 if (neumann && pcbddc->NullSpace_corr[2]) { 4838 check_corr[1] = PETSC_TRUE; 4839 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4840 } 4841 4842 /* check Dirichlet and Neumann solvers */ 4843 if (pcbddc->dbg_flag) { 4844 if (dirichlet) { /* Dirichlet */ 4845 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4846 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4847 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4848 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4849 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4850 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4851 if (check_corr[0]) { 4852 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4853 } 4854 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4855 } 4856 if (neumann) { /* Neumann */ 4857 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4858 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4859 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4860 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4861 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4862 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4863 if (check_corr[1]) { 4864 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4865 } 4866 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4867 } 4868 } 4869 /* free Neumann problem's matrix */ 4870 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4871 PetscFunctionReturn(0); 4872 } 4873 4874 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4875 { 4876 PetscErrorCode ierr; 4877 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4878 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4879 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4880 4881 PetscFunctionBegin; 4882 if (!reuse_solver) { 4883 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4884 } 4885 if (!pcbddc->switch_static) { 4886 if (applytranspose && pcbddc->local_auxmat1) { 4887 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4888 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4889 } 4890 if (!reuse_solver) { 4891 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4892 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4893 } else { 4894 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4895 4896 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4897 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4898 } 4899 } else { 4900 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4901 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4902 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4903 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4904 if (applytranspose && pcbddc->local_auxmat1) { 4905 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4906 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4907 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4908 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4909 } 4910 } 4911 if (!reuse_solver || pcbddc->switch_static) { 4912 if (applytranspose) { 4913 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4914 } else { 4915 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4916 } 4917 } else { 4918 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4919 4920 if (applytranspose) { 4921 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4922 } else { 4923 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4924 } 4925 } 4926 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4927 if (!pcbddc->switch_static) { 4928 if (!reuse_solver) { 4929 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4930 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4931 } else { 4932 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4933 4934 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4935 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4936 } 4937 if (!applytranspose && pcbddc->local_auxmat1) { 4938 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4939 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4940 } 4941 } else { 4942 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4943 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4944 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4945 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4946 if (!applytranspose && pcbddc->local_auxmat1) { 4947 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4948 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4949 } 4950 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4951 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4952 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4953 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4954 } 4955 PetscFunctionReturn(0); 4956 } 4957 4958 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4959 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4960 { 4961 PetscErrorCode ierr; 4962 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4963 PC_IS* pcis = (PC_IS*) (pc->data); 4964 const PetscScalar zero = 0.0; 4965 4966 PetscFunctionBegin; 4967 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4968 if (!pcbddc->benign_apply_coarse_only) { 4969 if (applytranspose) { 4970 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4971 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4972 } else { 4973 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4974 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4975 } 4976 } else { 4977 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4978 } 4979 4980 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4981 if (pcbddc->benign_n) { 4982 PetscScalar *array; 4983 PetscInt j; 4984 4985 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4986 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4987 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4988 } 4989 4990 /* start communications from local primal nodes to rhs of coarse solver */ 4991 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4992 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4993 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4994 4995 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4996 if (pcbddc->coarse_ksp) { 4997 Mat coarse_mat; 4998 Vec rhs,sol; 4999 MatNullSpace nullsp; 5000 PetscBool isbddc = PETSC_FALSE; 5001 5002 if (pcbddc->benign_have_null) { 5003 PC coarse_pc; 5004 5005 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5006 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5007 /* we need to propagate to coarser levels the need for a possible benign correction */ 5008 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5009 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5010 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5011 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5012 } 5013 } 5014 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5015 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5016 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5017 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5018 if (nullsp) { 5019 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5020 } 5021 if (applytranspose) { 5022 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5023 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5024 } else { 5025 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5026 PC coarse_pc; 5027 5028 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5029 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5030 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5031 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5032 } else { 5033 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5034 } 5035 } 5036 /* we don't need the benign correction at coarser levels anymore */ 5037 if (pcbddc->benign_have_null && isbddc) { 5038 PC coarse_pc; 5039 PC_BDDC* coarsepcbddc; 5040 5041 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5042 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5043 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5044 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5045 } 5046 if (nullsp) { 5047 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5048 } 5049 } 5050 5051 /* Local solution on R nodes */ 5052 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5053 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5054 } 5055 /* communications from coarse sol to local primal nodes */ 5056 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5057 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5058 5059 /* Sum contributions from the two levels */ 5060 if (!pcbddc->benign_apply_coarse_only) { 5061 if (applytranspose) { 5062 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5063 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5064 } else { 5065 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5066 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5067 } 5068 /* store p0 */ 5069 if (pcbddc->benign_n) { 5070 PetscScalar *array; 5071 PetscInt j; 5072 5073 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5074 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5075 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5076 } 5077 } else { /* expand the coarse solution */ 5078 if (applytranspose) { 5079 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5080 } else { 5081 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5082 } 5083 } 5084 PetscFunctionReturn(0); 5085 } 5086 5087 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5088 { 5089 PetscErrorCode ierr; 5090 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5091 PetscScalar *array; 5092 Vec from,to; 5093 5094 PetscFunctionBegin; 5095 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5096 from = pcbddc->coarse_vec; 5097 to = pcbddc->vec1_P; 5098 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5099 Vec tvec; 5100 5101 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5102 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5103 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5104 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5105 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5106 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5107 } 5108 } else { /* from local to global -> put data in coarse right hand side */ 5109 from = pcbddc->vec1_P; 5110 to = pcbddc->coarse_vec; 5111 } 5112 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5113 PetscFunctionReturn(0); 5114 } 5115 5116 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5117 { 5118 PetscErrorCode ierr; 5119 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5120 PetscScalar *array; 5121 Vec from,to; 5122 5123 PetscFunctionBegin; 5124 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5125 from = pcbddc->coarse_vec; 5126 to = pcbddc->vec1_P; 5127 } else { /* from local to global -> put data in coarse right hand side */ 5128 from = pcbddc->vec1_P; 5129 to = pcbddc->coarse_vec; 5130 } 5131 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5132 if (smode == SCATTER_FORWARD) { 5133 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5134 Vec tvec; 5135 5136 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5137 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5138 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5139 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5140 } 5141 } else { 5142 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5143 ierr = VecResetArray(from);CHKERRQ(ierr); 5144 } 5145 } 5146 PetscFunctionReturn(0); 5147 } 5148 5149 /* uncomment for testing purposes */ 5150 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5151 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5152 { 5153 PetscErrorCode ierr; 5154 PC_IS* pcis = (PC_IS*)(pc->data); 5155 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5156 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5157 /* one and zero */ 5158 PetscScalar one=1.0,zero=0.0; 5159 /* space to store constraints and their local indices */ 5160 PetscScalar *constraints_data; 5161 PetscInt *constraints_idxs,*constraints_idxs_B; 5162 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5163 PetscInt *constraints_n; 5164 /* iterators */ 5165 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5166 /* BLAS integers */ 5167 PetscBLASInt lwork,lierr; 5168 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5169 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5170 /* reuse */ 5171 PetscInt olocal_primal_size,olocal_primal_size_cc; 5172 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5173 /* change of basis */ 5174 PetscBool qr_needed; 5175 PetscBT change_basis,qr_needed_idx; 5176 /* auxiliary stuff */ 5177 PetscInt *nnz,*is_indices; 5178 PetscInt ncc; 5179 /* some quantities */ 5180 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5181 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5182 5183 PetscFunctionBegin; 5184 /* Destroy Mat objects computed previously */ 5185 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5186 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5187 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5188 /* save info on constraints from previous setup (if any) */ 5189 olocal_primal_size = pcbddc->local_primal_size; 5190 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5191 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5192 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5193 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5194 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5195 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5196 5197 if (!pcbddc->adaptive_selection) { 5198 IS ISForVertices,*ISForFaces,*ISForEdges; 5199 MatNullSpace nearnullsp; 5200 const Vec *nearnullvecs; 5201 Vec *localnearnullsp; 5202 PetscScalar *array; 5203 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5204 PetscBool nnsp_has_cnst; 5205 /* LAPACK working arrays for SVD or POD */ 5206 PetscBool skip_lapack,boolforchange; 5207 PetscScalar *work; 5208 PetscReal *singular_vals; 5209 #if defined(PETSC_USE_COMPLEX) 5210 PetscReal *rwork; 5211 #endif 5212 #if defined(PETSC_MISSING_LAPACK_GESVD) 5213 PetscScalar *temp_basis,*correlation_mat; 5214 #else 5215 PetscBLASInt dummy_int=1; 5216 PetscScalar dummy_scalar=1.; 5217 #endif 5218 5219 /* Get index sets for faces, edges and vertices from graph */ 5220 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5221 /* print some info */ 5222 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5223 PetscInt nv; 5224 5225 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5226 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5227 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5228 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5229 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5230 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5231 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5232 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5233 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5234 } 5235 5236 /* free unneeded index sets */ 5237 if (!pcbddc->use_vertices) { 5238 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5239 } 5240 if (!pcbddc->use_edges) { 5241 for (i=0;i<n_ISForEdges;i++) { 5242 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5243 } 5244 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5245 n_ISForEdges = 0; 5246 } 5247 if (!pcbddc->use_faces) { 5248 for (i=0;i<n_ISForFaces;i++) { 5249 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5250 } 5251 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5252 n_ISForFaces = 0; 5253 } 5254 5255 /* check if near null space is attached to global mat */ 5256 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5257 if (nearnullsp) { 5258 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5259 /* remove any stored info */ 5260 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5261 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5262 /* store information for BDDC solver reuse */ 5263 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5264 pcbddc->onearnullspace = nearnullsp; 5265 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5266 for (i=0;i<nnsp_size;i++) { 5267 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5268 } 5269 } else { /* if near null space is not provided BDDC uses constants by default */ 5270 nnsp_size = 0; 5271 nnsp_has_cnst = PETSC_TRUE; 5272 } 5273 /* get max number of constraints on a single cc */ 5274 max_constraints = nnsp_size; 5275 if (nnsp_has_cnst) max_constraints++; 5276 5277 /* 5278 Evaluate maximum storage size needed by the procedure 5279 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5280 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5281 There can be multiple constraints per connected component 5282 */ 5283 n_vertices = 0; 5284 if (ISForVertices) { 5285 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5286 } 5287 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5288 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5289 5290 total_counts = n_ISForFaces+n_ISForEdges; 5291 total_counts *= max_constraints; 5292 total_counts += n_vertices; 5293 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5294 5295 total_counts = 0; 5296 max_size_of_constraint = 0; 5297 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5298 IS used_is; 5299 if (i<n_ISForEdges) { 5300 used_is = ISForEdges[i]; 5301 } else { 5302 used_is = ISForFaces[i-n_ISForEdges]; 5303 } 5304 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5305 total_counts += j; 5306 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5307 } 5308 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5309 5310 /* get local part of global near null space vectors */ 5311 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5312 for (k=0;k<nnsp_size;k++) { 5313 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5314 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5315 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5316 } 5317 5318 /* whether or not to skip lapack calls */ 5319 skip_lapack = PETSC_TRUE; 5320 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5321 5322 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5323 if (!skip_lapack) { 5324 PetscScalar temp_work; 5325 5326 #if defined(PETSC_MISSING_LAPACK_GESVD) 5327 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5328 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5329 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5330 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5331 #if defined(PETSC_USE_COMPLEX) 5332 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5333 #endif 5334 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5335 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5336 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5337 lwork = -1; 5338 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5339 #if !defined(PETSC_USE_COMPLEX) 5340 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5341 #else 5342 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5343 #endif 5344 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5345 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5346 #else /* on missing GESVD */ 5347 /* SVD */ 5348 PetscInt max_n,min_n; 5349 max_n = max_size_of_constraint; 5350 min_n = max_constraints; 5351 if (max_size_of_constraint < max_constraints) { 5352 min_n = max_size_of_constraint; 5353 max_n = max_constraints; 5354 } 5355 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5356 #if defined(PETSC_USE_COMPLEX) 5357 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5358 #endif 5359 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5360 lwork = -1; 5361 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5362 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5363 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5364 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5365 #if !defined(PETSC_USE_COMPLEX) 5366 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5367 #else 5368 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5369 #endif 5370 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5371 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5372 #endif /* on missing GESVD */ 5373 /* Allocate optimal workspace */ 5374 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5375 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5376 } 5377 /* Now we can loop on constraining sets */ 5378 total_counts = 0; 5379 constraints_idxs_ptr[0] = 0; 5380 constraints_data_ptr[0] = 0; 5381 /* vertices */ 5382 if (n_vertices) { 5383 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5384 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5385 for (i=0;i<n_vertices;i++) { 5386 constraints_n[total_counts] = 1; 5387 constraints_data[total_counts] = 1.0; 5388 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5389 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5390 total_counts++; 5391 } 5392 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5393 n_vertices = total_counts; 5394 } 5395 5396 /* edges and faces */ 5397 total_counts_cc = total_counts; 5398 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5399 IS used_is; 5400 PetscBool idxs_copied = PETSC_FALSE; 5401 5402 if (ncc<n_ISForEdges) { 5403 used_is = ISForEdges[ncc]; 5404 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5405 } else { 5406 used_is = ISForFaces[ncc-n_ISForEdges]; 5407 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5408 } 5409 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5410 5411 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5412 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5413 /* change of basis should not be performed on local periodic nodes */ 5414 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5415 if (nnsp_has_cnst) { 5416 PetscScalar quad_value; 5417 5418 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5419 idxs_copied = PETSC_TRUE; 5420 5421 if (!pcbddc->use_nnsp_true) { 5422 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5423 } else { 5424 quad_value = 1.0; 5425 } 5426 for (j=0;j<size_of_constraint;j++) { 5427 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5428 } 5429 temp_constraints++; 5430 total_counts++; 5431 } 5432 for (k=0;k<nnsp_size;k++) { 5433 PetscReal real_value; 5434 PetscScalar *ptr_to_data; 5435 5436 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5437 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5438 for (j=0;j<size_of_constraint;j++) { 5439 ptr_to_data[j] = array[is_indices[j]]; 5440 } 5441 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5442 /* check if array is null on the connected component */ 5443 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5444 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5445 if (real_value > 0.0) { /* keep indices and values */ 5446 temp_constraints++; 5447 total_counts++; 5448 if (!idxs_copied) { 5449 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5450 idxs_copied = PETSC_TRUE; 5451 } 5452 } 5453 } 5454 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5455 valid_constraints = temp_constraints; 5456 if (!pcbddc->use_nnsp_true && temp_constraints) { 5457 if (temp_constraints == 1) { /* just normalize the constraint */ 5458 PetscScalar norm,*ptr_to_data; 5459 5460 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5461 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5462 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5463 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5464 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5465 } else { /* perform SVD */ 5466 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5467 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5468 5469 #if defined(PETSC_MISSING_LAPACK_GESVD) 5470 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5471 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5472 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5473 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5474 from that computed using LAPACKgesvd 5475 -> This is due to a different computation of eigenvectors in LAPACKheev 5476 -> The quality of the POD-computed basis will be the same */ 5477 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5478 /* Store upper triangular part of correlation matrix */ 5479 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5480 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5481 for (j=0;j<temp_constraints;j++) { 5482 for (k=0;k<j+1;k++) { 5483 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5484 } 5485 } 5486 /* compute eigenvalues and eigenvectors of correlation matrix */ 5487 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5488 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5489 #if !defined(PETSC_USE_COMPLEX) 5490 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5491 #else 5492 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5493 #endif 5494 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5495 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5496 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5497 j = 0; 5498 while (j < temp_constraints && singular_vals[j] < tol) j++; 5499 total_counts = total_counts-j; 5500 valid_constraints = temp_constraints-j; 5501 /* scale and copy POD basis into used quadrature memory */ 5502 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5503 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5504 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5506 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5507 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5508 if (j<temp_constraints) { 5509 PetscInt ii; 5510 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5511 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5512 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5513 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5514 for (k=0;k<temp_constraints-j;k++) { 5515 for (ii=0;ii<size_of_constraint;ii++) { 5516 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5517 } 5518 } 5519 } 5520 #else /* on missing GESVD */ 5521 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5522 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5523 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5524 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5525 #if !defined(PETSC_USE_COMPLEX) 5526 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5527 #else 5528 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5529 #endif 5530 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5531 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5532 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5533 k = temp_constraints; 5534 if (k > size_of_constraint) k = size_of_constraint; 5535 j = 0; 5536 while (j < k && singular_vals[k-j-1] < tol) j++; 5537 valid_constraints = k-j; 5538 total_counts = total_counts-temp_constraints+valid_constraints; 5539 #endif /* on missing GESVD */ 5540 } 5541 } 5542 /* update pointers information */ 5543 if (valid_constraints) { 5544 constraints_n[total_counts_cc] = valid_constraints; 5545 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5546 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5547 /* set change_of_basis flag */ 5548 if (boolforchange) { 5549 PetscBTSet(change_basis,total_counts_cc); 5550 } 5551 total_counts_cc++; 5552 } 5553 } 5554 /* free workspace */ 5555 if (!skip_lapack) { 5556 ierr = PetscFree(work);CHKERRQ(ierr); 5557 #if defined(PETSC_USE_COMPLEX) 5558 ierr = PetscFree(rwork);CHKERRQ(ierr); 5559 #endif 5560 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5561 #if defined(PETSC_MISSING_LAPACK_GESVD) 5562 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5563 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5564 #endif 5565 } 5566 for (k=0;k<nnsp_size;k++) { 5567 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5568 } 5569 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5570 /* free index sets of faces, edges and vertices */ 5571 for (i=0;i<n_ISForFaces;i++) { 5572 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5573 } 5574 if (n_ISForFaces) { 5575 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5576 } 5577 for (i=0;i<n_ISForEdges;i++) { 5578 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5579 } 5580 if (n_ISForEdges) { 5581 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5582 } 5583 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5584 } else { 5585 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5586 5587 total_counts = 0; 5588 n_vertices = 0; 5589 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5590 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5591 } 5592 max_constraints = 0; 5593 total_counts_cc = 0; 5594 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5595 total_counts += pcbddc->adaptive_constraints_n[i]; 5596 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5597 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5598 } 5599 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5600 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5601 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5602 constraints_data = pcbddc->adaptive_constraints_data; 5603 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5604 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5605 total_counts_cc = 0; 5606 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5607 if (pcbddc->adaptive_constraints_n[i]) { 5608 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5609 } 5610 } 5611 #if 0 5612 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5613 for (i=0;i<total_counts_cc;i++) { 5614 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5615 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5616 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5617 printf(" %d",constraints_idxs[j]); 5618 } 5619 printf("\n"); 5620 printf("number of cc: %d\n",constraints_n[i]); 5621 } 5622 for (i=0;i<n_vertices;i++) { 5623 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5624 } 5625 for (i=0;i<sub_schurs->n_subs;i++) { 5626 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5627 } 5628 #endif 5629 5630 max_size_of_constraint = 0; 5631 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5632 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5633 /* Change of basis */ 5634 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5635 if (pcbddc->use_change_of_basis) { 5636 for (i=0;i<sub_schurs->n_subs;i++) { 5637 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5638 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5639 } 5640 } 5641 } 5642 } 5643 pcbddc->local_primal_size = total_counts; 5644 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5645 5646 /* map constraints_idxs in boundary numbering */ 5647 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5648 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5649 5650 /* Create constraint matrix */ 5651 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5652 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5653 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5654 5655 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5656 /* determine if a QR strategy is needed for change of basis */ 5657 qr_needed = PETSC_FALSE; 5658 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5659 total_primal_vertices=0; 5660 pcbddc->local_primal_size_cc = 0; 5661 for (i=0;i<total_counts_cc;i++) { 5662 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5663 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5664 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5665 pcbddc->local_primal_size_cc += 1; 5666 } else if (PetscBTLookup(change_basis,i)) { 5667 for (k=0;k<constraints_n[i];k++) { 5668 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5669 } 5670 pcbddc->local_primal_size_cc += constraints_n[i]; 5671 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5672 PetscBTSet(qr_needed_idx,i); 5673 qr_needed = PETSC_TRUE; 5674 } 5675 } else { 5676 pcbddc->local_primal_size_cc += 1; 5677 } 5678 } 5679 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5680 pcbddc->n_vertices = total_primal_vertices; 5681 /* permute indices in order to have a sorted set of vertices */ 5682 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5683 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5684 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5685 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5686 5687 /* nonzero structure of constraint matrix */ 5688 /* and get reference dof for local constraints */ 5689 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5690 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5691 5692 j = total_primal_vertices; 5693 total_counts = total_primal_vertices; 5694 cum = total_primal_vertices; 5695 for (i=n_vertices;i<total_counts_cc;i++) { 5696 if (!PetscBTLookup(change_basis,i)) { 5697 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5698 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5699 cum++; 5700 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5701 for (k=0;k<constraints_n[i];k++) { 5702 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5703 nnz[j+k] = size_of_constraint; 5704 } 5705 j += constraints_n[i]; 5706 } 5707 } 5708 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5709 ierr = PetscFree(nnz);CHKERRQ(ierr); 5710 5711 /* set values in constraint matrix */ 5712 for (i=0;i<total_primal_vertices;i++) { 5713 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5714 } 5715 total_counts = total_primal_vertices; 5716 for (i=n_vertices;i<total_counts_cc;i++) { 5717 if (!PetscBTLookup(change_basis,i)) { 5718 PetscInt *cols; 5719 5720 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5721 cols = constraints_idxs+constraints_idxs_ptr[i]; 5722 for (k=0;k<constraints_n[i];k++) { 5723 PetscInt row = total_counts+k; 5724 PetscScalar *vals; 5725 5726 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5727 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5728 } 5729 total_counts += constraints_n[i]; 5730 } 5731 } 5732 /* assembling */ 5733 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5734 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5735 5736 /* 5737 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5738 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5739 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5740 */ 5741 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5742 if (pcbddc->use_change_of_basis) { 5743 /* dual and primal dofs on a single cc */ 5744 PetscInt dual_dofs,primal_dofs; 5745 /* working stuff for GEQRF */ 5746 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5747 PetscBLASInt lqr_work; 5748 /* working stuff for UNGQR */ 5749 PetscScalar *gqr_work,lgqr_work_t; 5750 PetscBLASInt lgqr_work; 5751 /* working stuff for TRTRS */ 5752 PetscScalar *trs_rhs; 5753 PetscBLASInt Blas_NRHS; 5754 /* pointers for values insertion into change of basis matrix */ 5755 PetscInt *start_rows,*start_cols; 5756 PetscScalar *start_vals; 5757 /* working stuff for values insertion */ 5758 PetscBT is_primal; 5759 PetscInt *aux_primal_numbering_B; 5760 /* matrix sizes */ 5761 PetscInt global_size,local_size; 5762 /* temporary change of basis */ 5763 Mat localChangeOfBasisMatrix; 5764 /* extra space for debugging */ 5765 PetscScalar *dbg_work; 5766 5767 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5768 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5769 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5770 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5771 /* nonzeros for local mat */ 5772 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5773 if (!pcbddc->benign_change || pcbddc->fake_change) { 5774 for (i=0;i<pcis->n;i++) nnz[i]=1; 5775 } else { 5776 const PetscInt *ii; 5777 PetscInt n; 5778 PetscBool flg_row; 5779 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5780 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5781 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5782 } 5783 for (i=n_vertices;i<total_counts_cc;i++) { 5784 if (PetscBTLookup(change_basis,i)) { 5785 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5786 if (PetscBTLookup(qr_needed_idx,i)) { 5787 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5788 } else { 5789 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5790 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5791 } 5792 } 5793 } 5794 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5795 ierr = PetscFree(nnz);CHKERRQ(ierr); 5796 /* Set interior change in the matrix */ 5797 if (!pcbddc->benign_change || pcbddc->fake_change) { 5798 for (i=0;i<pcis->n;i++) { 5799 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5800 } 5801 } else { 5802 const PetscInt *ii,*jj; 5803 PetscScalar *aa; 5804 PetscInt n; 5805 PetscBool flg_row; 5806 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5807 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5808 for (i=0;i<n;i++) { 5809 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5810 } 5811 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5812 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5813 } 5814 5815 if (pcbddc->dbg_flag) { 5816 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5818 } 5819 5820 5821 /* Now we loop on the constraints which need a change of basis */ 5822 /* 5823 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5824 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5825 5826 Basic blocks of change of basis matrix T computed by 5827 5828 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5829 5830 | 1 0 ... 0 s_1/S | 5831 | 0 1 ... 0 s_2/S | 5832 | ... | 5833 | 0 ... 1 s_{n-1}/S | 5834 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5835 5836 with S = \sum_{i=1}^n s_i^2 5837 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5838 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5839 5840 - QR decomposition of constraints otherwise 5841 */ 5842 if (qr_needed) { 5843 /* space to store Q */ 5844 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5845 /* array to store scaling factors for reflectors */ 5846 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5847 /* first we issue queries for optimal work */ 5848 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5849 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5850 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5851 lqr_work = -1; 5852 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5853 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5854 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5855 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5856 lgqr_work = -1; 5857 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5858 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5859 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5860 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5861 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5862 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5863 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5864 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5865 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5866 /* array to store rhs and solution of triangular solver */ 5867 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5868 /* allocating workspace for check */ 5869 if (pcbddc->dbg_flag) { 5870 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5871 } 5872 } 5873 /* array to store whether a node is primal or not */ 5874 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5875 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5876 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5877 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5878 for (i=0;i<total_primal_vertices;i++) { 5879 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5880 } 5881 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5882 5883 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5884 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5885 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5886 if (PetscBTLookup(change_basis,total_counts)) { 5887 /* get constraint info */ 5888 primal_dofs = constraints_n[total_counts]; 5889 dual_dofs = size_of_constraint-primal_dofs; 5890 5891 if (pcbddc->dbg_flag) { 5892 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5893 } 5894 5895 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5896 5897 /* copy quadrature constraints for change of basis check */ 5898 if (pcbddc->dbg_flag) { 5899 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5900 } 5901 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5902 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5903 5904 /* compute QR decomposition of constraints */ 5905 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5906 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5907 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5908 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5909 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5910 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5911 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5912 5913 /* explictly compute R^-T */ 5914 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5915 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5916 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5917 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5918 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5919 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5920 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5921 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5922 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5923 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5924 5925 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5926 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5927 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5928 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5929 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5930 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5931 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5932 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5933 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5934 5935 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5936 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5937 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5938 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5939 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5940 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5941 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5942 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5943 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5944 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5945 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5946 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5947 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5948 5949 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5950 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5951 /* insert cols for primal dofs */ 5952 for (j=0;j<primal_dofs;j++) { 5953 start_vals = &qr_basis[j*size_of_constraint]; 5954 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5955 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5956 } 5957 /* insert cols for dual dofs */ 5958 for (j=0,k=0;j<dual_dofs;k++) { 5959 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5960 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5961 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5962 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5963 j++; 5964 } 5965 } 5966 5967 /* check change of basis */ 5968 if (pcbddc->dbg_flag) { 5969 PetscInt ii,jj; 5970 PetscBool valid_qr=PETSC_TRUE; 5971 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5972 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5973 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5975 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5976 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5977 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5978 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5979 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5980 for (jj=0;jj<size_of_constraint;jj++) { 5981 for (ii=0;ii<primal_dofs;ii++) { 5982 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5983 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5984 } 5985 } 5986 if (!valid_qr) { 5987 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5988 for (jj=0;jj<size_of_constraint;jj++) { 5989 for (ii=0;ii<primal_dofs;ii++) { 5990 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5991 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5992 } 5993 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5994 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5995 } 5996 } 5997 } 5998 } else { 5999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6000 } 6001 } 6002 } else { /* simple transformation block */ 6003 PetscInt row,col; 6004 PetscScalar val,norm; 6005 6006 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6007 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6008 for (j=0;j<size_of_constraint;j++) { 6009 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6010 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6011 if (!PetscBTLookup(is_primal,row_B)) { 6012 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6013 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6014 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6015 } else { 6016 for (k=0;k<size_of_constraint;k++) { 6017 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6018 if (row != col) { 6019 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6020 } else { 6021 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6022 } 6023 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6024 } 6025 } 6026 } 6027 if (pcbddc->dbg_flag) { 6028 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6029 } 6030 } 6031 } else { 6032 if (pcbddc->dbg_flag) { 6033 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6034 } 6035 } 6036 } 6037 6038 /* free workspace */ 6039 if (qr_needed) { 6040 if (pcbddc->dbg_flag) { 6041 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6042 } 6043 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6044 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6045 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6046 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6047 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6048 } 6049 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6050 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6051 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6052 6053 /* assembling of global change of variable */ 6054 if (!pcbddc->fake_change) { 6055 Mat tmat; 6056 PetscInt bs; 6057 6058 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6059 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6060 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6061 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6062 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6063 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6064 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6065 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6066 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6067 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6068 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6069 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6070 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6071 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6072 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6073 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6074 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6075 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6076 6077 /* check */ 6078 if (pcbddc->dbg_flag) { 6079 PetscReal error; 6080 Vec x,x_change; 6081 6082 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6083 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6084 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6085 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6086 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6087 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6088 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6089 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6090 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6091 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6092 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6093 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6094 if (error > PETSC_SMALL) { 6095 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6096 } 6097 ierr = VecDestroy(&x);CHKERRQ(ierr); 6098 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6099 } 6100 /* adapt sub_schurs computed (if any) */ 6101 if (pcbddc->use_deluxe_scaling) { 6102 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6103 6104 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6105 if (sub_schurs && sub_schurs->S_Ej_all) { 6106 Mat S_new,tmat; 6107 IS is_all_N,is_V_Sall = NULL; 6108 6109 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6110 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6111 if (pcbddc->deluxe_zerorows) { 6112 ISLocalToGlobalMapping NtoSall; 6113 IS is_V; 6114 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6115 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6116 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6117 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6118 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6119 } 6120 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6121 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6122 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6123 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6124 if (pcbddc->deluxe_zerorows) { 6125 const PetscScalar *array; 6126 const PetscInt *idxs_V,*idxs_all; 6127 PetscInt i,n_V; 6128 6129 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6130 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6131 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6132 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6133 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6134 for (i=0;i<n_V;i++) { 6135 PetscScalar val; 6136 PetscInt idx; 6137 6138 idx = idxs_V[i]; 6139 val = array[idxs_all[idxs_V[i]]]; 6140 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6141 } 6142 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6143 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6144 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6145 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6146 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6147 } 6148 sub_schurs->S_Ej_all = S_new; 6149 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6150 if (sub_schurs->sum_S_Ej_all) { 6151 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6152 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6153 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6154 if (pcbddc->deluxe_zerorows) { 6155 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6156 } 6157 sub_schurs->sum_S_Ej_all = S_new; 6158 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6159 } 6160 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6161 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6162 } 6163 /* destroy any change of basis context in sub_schurs */ 6164 if (sub_schurs && sub_schurs->change) { 6165 PetscInt i; 6166 6167 for (i=0;i<sub_schurs->n_subs;i++) { 6168 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6169 } 6170 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6171 } 6172 } 6173 if (pcbddc->switch_static) { /* need to save the local change */ 6174 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6175 } else { 6176 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6177 } 6178 /* determine if any process has changed the pressures locally */ 6179 pcbddc->change_interior = pcbddc->benign_have_null; 6180 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6181 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6182 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6183 pcbddc->use_qr_single = qr_needed; 6184 } 6185 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6186 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6187 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6188 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6189 } else { 6190 Mat benign_global = NULL; 6191 if (pcbddc->benign_have_null) { 6192 Mat tmat; 6193 6194 pcbddc->change_interior = PETSC_TRUE; 6195 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6196 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6197 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6198 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6199 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6200 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6201 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6202 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6203 if (pcbddc->benign_change) { 6204 Mat M; 6205 6206 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6207 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6208 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6209 ierr = MatDestroy(&M);CHKERRQ(ierr); 6210 } else { 6211 Mat eye; 6212 PetscScalar *array; 6213 6214 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6215 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6216 for (i=0;i<pcis->n;i++) { 6217 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6218 } 6219 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6220 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6221 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6222 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6223 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6224 } 6225 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6226 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6227 } 6228 if (pcbddc->user_ChangeOfBasisMatrix) { 6229 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6230 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6231 } else if (pcbddc->benign_have_null) { 6232 pcbddc->ChangeOfBasisMatrix = benign_global; 6233 } 6234 } 6235 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6236 IS is_global; 6237 const PetscInt *gidxs; 6238 6239 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6240 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6241 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6242 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6243 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6244 } 6245 } 6246 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6247 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6248 } 6249 6250 if (!pcbddc->fake_change) { 6251 /* add pressure dofs to set of primal nodes for numbering purposes */ 6252 for (i=0;i<pcbddc->benign_n;i++) { 6253 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6254 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6255 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6256 pcbddc->local_primal_size_cc++; 6257 pcbddc->local_primal_size++; 6258 } 6259 6260 /* check if a new primal space has been introduced (also take into account benign trick) */ 6261 pcbddc->new_primal_space_local = PETSC_TRUE; 6262 if (olocal_primal_size == pcbddc->local_primal_size) { 6263 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6264 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6265 if (!pcbddc->new_primal_space_local) { 6266 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6267 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6268 } 6269 } 6270 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6271 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6272 } 6273 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6274 6275 /* flush dbg viewer */ 6276 if (pcbddc->dbg_flag) { 6277 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6278 } 6279 6280 /* free workspace */ 6281 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6282 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6283 if (!pcbddc->adaptive_selection) { 6284 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6285 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6286 } else { 6287 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6288 pcbddc->adaptive_constraints_idxs_ptr, 6289 pcbddc->adaptive_constraints_data_ptr, 6290 pcbddc->adaptive_constraints_idxs, 6291 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6292 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6293 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6294 } 6295 PetscFunctionReturn(0); 6296 } 6297 6298 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6299 { 6300 ISLocalToGlobalMapping map; 6301 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6302 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6303 PetscInt i,N; 6304 PetscBool rcsr = PETSC_FALSE; 6305 PetscErrorCode ierr; 6306 6307 PetscFunctionBegin; 6308 if (pcbddc->recompute_topography) { 6309 pcbddc->graphanalyzed = PETSC_FALSE; 6310 /* Reset previously computed graph */ 6311 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6312 /* Init local Graph struct */ 6313 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6314 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6315 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6316 6317 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6318 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6319 } 6320 /* Check validity of the csr graph passed in by the user */ 6321 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6322 6323 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6324 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6325 PetscInt *xadj,*adjncy; 6326 PetscInt nvtxs; 6327 PetscBool flg_row=PETSC_FALSE; 6328 6329 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6330 if (flg_row) { 6331 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6332 pcbddc->computed_rowadj = PETSC_TRUE; 6333 } 6334 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6335 rcsr = PETSC_TRUE; 6336 } 6337 if (pcbddc->dbg_flag) { 6338 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6339 } 6340 6341 /* Setup of Graph */ 6342 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6343 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6344 6345 /* attach info on disconnected subdomains if present */ 6346 if (pcbddc->n_local_subs) { 6347 PetscInt *local_subs; 6348 6349 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6350 for (i=0;i<pcbddc->n_local_subs;i++) { 6351 const PetscInt *idxs; 6352 PetscInt nl,j; 6353 6354 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6355 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6356 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6357 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6358 } 6359 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6360 pcbddc->mat_graph->local_subs = local_subs; 6361 } 6362 } 6363 6364 if (!pcbddc->graphanalyzed) { 6365 /* Graph's connected components analysis */ 6366 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6367 pcbddc->graphanalyzed = PETSC_TRUE; 6368 } 6369 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6370 PetscFunctionReturn(0); 6371 } 6372 6373 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6374 { 6375 PetscInt i,j; 6376 PetscScalar *alphas; 6377 PetscErrorCode ierr; 6378 6379 PetscFunctionBegin; 6380 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6381 for (i=0;i<n;i++) { 6382 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6383 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6384 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6385 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6386 } 6387 ierr = PetscFree(alphas);CHKERRQ(ierr); 6388 PetscFunctionReturn(0); 6389 } 6390 6391 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6392 { 6393 Mat A; 6394 PetscInt n_neighs,*neighs,*n_shared,**shared; 6395 PetscMPIInt size,rank,color; 6396 PetscInt *xadj,*adjncy; 6397 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6398 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6399 PetscInt void_procs,*procs_candidates = NULL; 6400 PetscInt xadj_count,*count; 6401 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6402 PetscSubcomm psubcomm; 6403 MPI_Comm subcomm; 6404 PetscErrorCode ierr; 6405 6406 PetscFunctionBegin; 6407 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6408 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6409 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6410 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6411 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6412 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6413 6414 if (have_void) *have_void = PETSC_FALSE; 6415 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6417 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6418 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6419 im_active = !!n; 6420 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6421 void_procs = size - active_procs; 6422 /* get ranks of of non-active processes in mat communicator */ 6423 if (void_procs) { 6424 PetscInt ncand; 6425 6426 if (have_void) *have_void = PETSC_TRUE; 6427 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6428 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6429 for (i=0,ncand=0;i<size;i++) { 6430 if (!procs_candidates[i]) { 6431 procs_candidates[ncand++] = i; 6432 } 6433 } 6434 /* force n_subdomains to be not greater that the number of non-active processes */ 6435 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6436 } 6437 6438 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6439 number of subdomains requested 1 -> send to master or first candidate in voids */ 6440 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6441 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6442 PetscInt issize,isidx,dest; 6443 if (*n_subdomains == 1) dest = 0; 6444 else dest = rank; 6445 if (im_active) { 6446 issize = 1; 6447 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6448 isidx = procs_candidates[dest]; 6449 } else { 6450 isidx = dest; 6451 } 6452 } else { 6453 issize = 0; 6454 isidx = -1; 6455 } 6456 if (*n_subdomains != 1) *n_subdomains = active_procs; 6457 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6458 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6459 PetscFunctionReturn(0); 6460 } 6461 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6462 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6463 threshold = PetscMax(threshold,2); 6464 6465 /* Get info on mapping */ 6466 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6467 6468 /* build local CSR graph of subdomains' connectivity */ 6469 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6470 xadj[0] = 0; 6471 xadj[1] = PetscMax(n_neighs-1,0); 6472 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6473 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6474 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6475 for (i=1;i<n_neighs;i++) 6476 for (j=0;j<n_shared[i];j++) 6477 count[shared[i][j]] += 1; 6478 6479 xadj_count = 0; 6480 for (i=1;i<n_neighs;i++) { 6481 for (j=0;j<n_shared[i];j++) { 6482 if (count[shared[i][j]] < threshold) { 6483 adjncy[xadj_count] = neighs[i]; 6484 adjncy_wgt[xadj_count] = n_shared[i]; 6485 xadj_count++; 6486 break; 6487 } 6488 } 6489 } 6490 xadj[1] = xadj_count; 6491 ierr = PetscFree(count);CHKERRQ(ierr); 6492 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6493 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6494 6495 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6496 6497 /* Restrict work on active processes only */ 6498 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6499 if (void_procs) { 6500 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6501 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6502 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6503 subcomm = PetscSubcommChild(psubcomm); 6504 } else { 6505 psubcomm = NULL; 6506 subcomm = PetscObjectComm((PetscObject)mat); 6507 } 6508 6509 v_wgt = NULL; 6510 if (!color) { 6511 ierr = PetscFree(xadj);CHKERRQ(ierr); 6512 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6513 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6514 } else { 6515 Mat subdomain_adj; 6516 IS new_ranks,new_ranks_contig; 6517 MatPartitioning partitioner; 6518 PetscInt rstart=0,rend=0; 6519 PetscInt *is_indices,*oldranks; 6520 PetscMPIInt size; 6521 PetscBool aggregate; 6522 6523 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6524 if (void_procs) { 6525 PetscInt prank = rank; 6526 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6527 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6528 for (i=0;i<xadj[1];i++) { 6529 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6530 } 6531 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6532 } else { 6533 oldranks = NULL; 6534 } 6535 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6536 if (aggregate) { /* TODO: all this part could be made more efficient */ 6537 PetscInt lrows,row,ncols,*cols; 6538 PetscMPIInt nrank; 6539 PetscScalar *vals; 6540 6541 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6542 lrows = 0; 6543 if (nrank<redprocs) { 6544 lrows = size/redprocs; 6545 if (nrank<size%redprocs) lrows++; 6546 } 6547 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6548 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6549 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6550 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6551 row = nrank; 6552 ncols = xadj[1]-xadj[0]; 6553 cols = adjncy; 6554 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6555 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6556 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6557 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6558 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6559 ierr = PetscFree(xadj);CHKERRQ(ierr); 6560 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6561 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6562 ierr = PetscFree(vals);CHKERRQ(ierr); 6563 if (use_vwgt) { 6564 Vec v; 6565 const PetscScalar *array; 6566 PetscInt nl; 6567 6568 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6569 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6570 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6571 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6572 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6573 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6574 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6575 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6576 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6577 ierr = VecDestroy(&v);CHKERRQ(ierr); 6578 } 6579 } else { 6580 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6581 if (use_vwgt) { 6582 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6583 v_wgt[0] = n; 6584 } 6585 } 6586 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6587 6588 /* Partition */ 6589 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6590 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6591 if (v_wgt) { 6592 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6593 } 6594 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6595 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6596 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6597 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6598 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6599 6600 /* renumber new_ranks to avoid "holes" in new set of processors */ 6601 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6602 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6603 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6604 if (!aggregate) { 6605 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6606 #if defined(PETSC_USE_DEBUG) 6607 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6608 #endif 6609 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6610 } else if (oldranks) { 6611 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6612 } else { 6613 ranks_send_to_idx[0] = is_indices[0]; 6614 } 6615 } else { 6616 PetscInt idxs[1]; 6617 PetscMPIInt tag; 6618 MPI_Request *reqs; 6619 6620 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6621 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6622 for (i=rstart;i<rend;i++) { 6623 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6624 } 6625 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6626 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6627 ierr = PetscFree(reqs);CHKERRQ(ierr); 6628 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6629 #if defined(PETSC_USE_DEBUG) 6630 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6631 #endif 6632 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6633 } else if (oldranks) { 6634 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6635 } else { 6636 ranks_send_to_idx[0] = idxs[0]; 6637 } 6638 } 6639 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6640 /* clean up */ 6641 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6642 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6643 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6644 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6645 } 6646 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6647 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6648 6649 /* assemble parallel IS for sends */ 6650 i = 1; 6651 if (!color) i=0; 6652 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6653 PetscFunctionReturn(0); 6654 } 6655 6656 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6657 6658 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6659 { 6660 Mat local_mat; 6661 IS is_sends_internal; 6662 PetscInt rows,cols,new_local_rows; 6663 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6664 PetscBool ismatis,isdense,newisdense,destroy_mat; 6665 ISLocalToGlobalMapping l2gmap; 6666 PetscInt* l2gmap_indices; 6667 const PetscInt* is_indices; 6668 MatType new_local_type; 6669 /* buffers */ 6670 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6671 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6672 PetscInt *recv_buffer_idxs_local; 6673 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6674 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6675 /* MPI */ 6676 MPI_Comm comm,comm_n; 6677 PetscSubcomm subcomm; 6678 PetscMPIInt n_sends,n_recvs,commsize; 6679 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6680 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6681 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6682 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6683 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6684 PetscErrorCode ierr; 6685 6686 PetscFunctionBegin; 6687 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6688 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6689 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6690 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6691 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6692 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6693 PetscValidLogicalCollectiveBool(mat,reuse,6); 6694 PetscValidLogicalCollectiveInt(mat,nis,8); 6695 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6696 if (nvecs) { 6697 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6698 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6699 } 6700 /* further checks */ 6701 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6702 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6703 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6704 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6705 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6706 if (reuse && *mat_n) { 6707 PetscInt mrows,mcols,mnrows,mncols; 6708 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6709 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6710 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6711 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6712 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6713 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6714 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6715 } 6716 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6717 PetscValidLogicalCollectiveInt(mat,bs,0); 6718 6719 /* prepare IS for sending if not provided */ 6720 if (!is_sends) { 6721 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6722 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6723 } else { 6724 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6725 is_sends_internal = is_sends; 6726 } 6727 6728 /* get comm */ 6729 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6730 6731 /* compute number of sends */ 6732 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6733 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6734 6735 /* compute number of receives */ 6736 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6737 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6738 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6739 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6740 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6741 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6742 ierr = PetscFree(iflags);CHKERRQ(ierr); 6743 6744 /* restrict comm if requested */ 6745 subcomm = 0; 6746 destroy_mat = PETSC_FALSE; 6747 if (restrict_comm) { 6748 PetscMPIInt color,subcommsize; 6749 6750 color = 0; 6751 if (restrict_full) { 6752 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6753 } else { 6754 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6755 } 6756 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6757 subcommsize = commsize - subcommsize; 6758 /* check if reuse has been requested */ 6759 if (reuse) { 6760 if (*mat_n) { 6761 PetscMPIInt subcommsize2; 6762 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6763 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6764 comm_n = PetscObjectComm((PetscObject)*mat_n); 6765 } else { 6766 comm_n = PETSC_COMM_SELF; 6767 } 6768 } else { /* MAT_INITIAL_MATRIX */ 6769 PetscMPIInt rank; 6770 6771 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6772 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6773 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6774 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6775 comm_n = PetscSubcommChild(subcomm); 6776 } 6777 /* flag to destroy *mat_n if not significative */ 6778 if (color) destroy_mat = PETSC_TRUE; 6779 } else { 6780 comm_n = comm; 6781 } 6782 6783 /* prepare send/receive buffers */ 6784 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6785 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6786 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6787 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6788 if (nis) { 6789 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6790 } 6791 6792 /* Get data from local matrices */ 6793 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6794 /* TODO: See below some guidelines on how to prepare the local buffers */ 6795 /* 6796 send_buffer_vals should contain the raw values of the local matrix 6797 send_buffer_idxs should contain: 6798 - MatType_PRIVATE type 6799 - PetscInt size_of_l2gmap 6800 - PetscInt global_row_indices[size_of_l2gmap] 6801 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6802 */ 6803 else { 6804 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6805 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6806 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6807 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6808 send_buffer_idxs[1] = i; 6809 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6810 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6811 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6812 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6813 for (i=0;i<n_sends;i++) { 6814 ilengths_vals[is_indices[i]] = len*len; 6815 ilengths_idxs[is_indices[i]] = len+2; 6816 } 6817 } 6818 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6819 /* additional is (if any) */ 6820 if (nis) { 6821 PetscMPIInt psum; 6822 PetscInt j; 6823 for (j=0,psum=0;j<nis;j++) { 6824 PetscInt plen; 6825 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6826 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6827 psum += len+1; /* indices + lenght */ 6828 } 6829 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6830 for (j=0,psum=0;j<nis;j++) { 6831 PetscInt plen; 6832 const PetscInt *is_array_idxs; 6833 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6834 send_buffer_idxs_is[psum] = plen; 6835 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6836 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6837 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6838 psum += plen+1; /* indices + lenght */ 6839 } 6840 for (i=0;i<n_sends;i++) { 6841 ilengths_idxs_is[is_indices[i]] = psum; 6842 } 6843 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6844 } 6845 6846 buf_size_idxs = 0; 6847 buf_size_vals = 0; 6848 buf_size_idxs_is = 0; 6849 buf_size_vecs = 0; 6850 for (i=0;i<n_recvs;i++) { 6851 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6852 buf_size_vals += (PetscInt)olengths_vals[i]; 6853 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6854 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6855 } 6856 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6857 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6858 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6859 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6860 6861 /* get new tags for clean communications */ 6862 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6863 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6864 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6865 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6866 6867 /* allocate for requests */ 6868 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6869 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6870 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6871 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6872 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6873 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6874 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6875 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6876 6877 /* communications */ 6878 ptr_idxs = recv_buffer_idxs; 6879 ptr_vals = recv_buffer_vals; 6880 ptr_idxs_is = recv_buffer_idxs_is; 6881 ptr_vecs = recv_buffer_vecs; 6882 for (i=0;i<n_recvs;i++) { 6883 source_dest = onodes[i]; 6884 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6885 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6886 ptr_idxs += olengths_idxs[i]; 6887 ptr_vals += olengths_vals[i]; 6888 if (nis) { 6889 source_dest = onodes_is[i]; 6890 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6891 ptr_idxs_is += olengths_idxs_is[i]; 6892 } 6893 if (nvecs) { 6894 source_dest = onodes[i]; 6895 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6896 ptr_vecs += olengths_idxs[i]-2; 6897 } 6898 } 6899 for (i=0;i<n_sends;i++) { 6900 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6901 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6902 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6903 if (nis) { 6904 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6905 } 6906 if (nvecs) { 6907 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6908 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6909 } 6910 } 6911 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6912 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6913 6914 /* assemble new l2g map */ 6915 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6916 ptr_idxs = recv_buffer_idxs; 6917 new_local_rows = 0; 6918 for (i=0;i<n_recvs;i++) { 6919 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6920 ptr_idxs += olengths_idxs[i]; 6921 } 6922 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6923 ptr_idxs = recv_buffer_idxs; 6924 new_local_rows = 0; 6925 for (i=0;i<n_recvs;i++) { 6926 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6927 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6928 ptr_idxs += olengths_idxs[i]; 6929 } 6930 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6931 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6932 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6933 6934 /* infer new local matrix type from received local matrices type */ 6935 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6936 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6937 if (n_recvs) { 6938 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6939 ptr_idxs = recv_buffer_idxs; 6940 for (i=0;i<n_recvs;i++) { 6941 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6942 new_local_type_private = MATAIJ_PRIVATE; 6943 break; 6944 } 6945 ptr_idxs += olengths_idxs[i]; 6946 } 6947 switch (new_local_type_private) { 6948 case MATDENSE_PRIVATE: 6949 new_local_type = MATSEQAIJ; 6950 bs = 1; 6951 break; 6952 case MATAIJ_PRIVATE: 6953 new_local_type = MATSEQAIJ; 6954 bs = 1; 6955 break; 6956 case MATBAIJ_PRIVATE: 6957 new_local_type = MATSEQBAIJ; 6958 break; 6959 case MATSBAIJ_PRIVATE: 6960 new_local_type = MATSEQSBAIJ; 6961 break; 6962 default: 6963 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 6964 break; 6965 } 6966 } else { /* by default, new_local_type is seqaij */ 6967 new_local_type = MATSEQAIJ; 6968 bs = 1; 6969 } 6970 6971 /* create MATIS object if needed */ 6972 if (!reuse) { 6973 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6974 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6975 } else { 6976 /* it also destroys the local matrices */ 6977 if (*mat_n) { 6978 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6979 } else { /* this is a fake object */ 6980 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6981 } 6982 } 6983 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6984 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6985 6986 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6987 6988 /* Global to local map of received indices */ 6989 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6990 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6991 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6992 6993 /* restore attributes -> type of incoming data and its size */ 6994 buf_size_idxs = 0; 6995 for (i=0;i<n_recvs;i++) { 6996 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6997 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6998 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6999 } 7000 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7001 7002 /* set preallocation */ 7003 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7004 if (!newisdense) { 7005 PetscInt *new_local_nnz=0; 7006 7007 ptr_idxs = recv_buffer_idxs_local; 7008 if (n_recvs) { 7009 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7010 } 7011 for (i=0;i<n_recvs;i++) { 7012 PetscInt j; 7013 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7014 for (j=0;j<*(ptr_idxs+1);j++) { 7015 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7016 } 7017 } else { 7018 /* TODO */ 7019 } 7020 ptr_idxs += olengths_idxs[i]; 7021 } 7022 if (new_local_nnz) { 7023 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7024 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7025 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7026 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7027 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7028 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7029 } else { 7030 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7031 } 7032 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7033 } else { 7034 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7035 } 7036 7037 /* set values */ 7038 ptr_vals = recv_buffer_vals; 7039 ptr_idxs = recv_buffer_idxs_local; 7040 for (i=0;i<n_recvs;i++) { 7041 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7042 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7043 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7044 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7045 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7046 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7047 } else { 7048 /* TODO */ 7049 } 7050 ptr_idxs += olengths_idxs[i]; 7051 ptr_vals += olengths_vals[i]; 7052 } 7053 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7054 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7055 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7056 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7057 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7058 7059 #if 0 7060 if (!restrict_comm) { /* check */ 7061 Vec lvec,rvec; 7062 PetscReal infty_error; 7063 7064 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7065 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7066 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7067 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7068 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7069 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7070 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7071 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7072 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7073 } 7074 #endif 7075 7076 /* assemble new additional is (if any) */ 7077 if (nis) { 7078 PetscInt **temp_idxs,*count_is,j,psum; 7079 7080 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7081 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7082 ptr_idxs = recv_buffer_idxs_is; 7083 psum = 0; 7084 for (i=0;i<n_recvs;i++) { 7085 for (j=0;j<nis;j++) { 7086 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7087 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7088 psum += plen; 7089 ptr_idxs += plen+1; /* shift pointer to received data */ 7090 } 7091 } 7092 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7093 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7094 for (i=1;i<nis;i++) { 7095 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7096 } 7097 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7098 ptr_idxs = recv_buffer_idxs_is; 7099 for (i=0;i<n_recvs;i++) { 7100 for (j=0;j<nis;j++) { 7101 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7102 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7103 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7104 ptr_idxs += plen+1; /* shift pointer to received data */ 7105 } 7106 } 7107 for (i=0;i<nis;i++) { 7108 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7109 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7110 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7111 } 7112 ierr = PetscFree(count_is);CHKERRQ(ierr); 7113 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7114 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7115 } 7116 /* free workspace */ 7117 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7118 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7119 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7120 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7121 if (isdense) { 7122 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7123 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7124 } else { 7125 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7126 } 7127 if (nis) { 7128 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7129 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7130 } 7131 7132 if (nvecs) { 7133 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7134 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7135 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7136 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7137 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7138 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7139 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7140 /* set values */ 7141 ptr_vals = recv_buffer_vecs; 7142 ptr_idxs = recv_buffer_idxs_local; 7143 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7144 for (i=0;i<n_recvs;i++) { 7145 PetscInt j; 7146 for (j=0;j<*(ptr_idxs+1);j++) { 7147 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7148 } 7149 ptr_idxs += olengths_idxs[i]; 7150 ptr_vals += olengths_idxs[i]-2; 7151 } 7152 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7153 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7154 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7155 } 7156 7157 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7158 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7159 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7160 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7161 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7162 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7163 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7164 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7165 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7166 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7167 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7168 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7169 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7170 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7171 ierr = PetscFree(onodes);CHKERRQ(ierr); 7172 if (nis) { 7173 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7174 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7175 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7176 } 7177 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7178 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7179 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7180 for (i=0;i<nis;i++) { 7181 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7182 } 7183 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7184 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7185 } 7186 *mat_n = NULL; 7187 } 7188 PetscFunctionReturn(0); 7189 } 7190 7191 /* temporary hack into ksp private data structure */ 7192 #include <petsc/private/kspimpl.h> 7193 7194 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7195 { 7196 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7197 PC_IS *pcis = (PC_IS*)pc->data; 7198 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7199 Mat coarsedivudotp = NULL; 7200 Mat coarseG,t_coarse_mat_is; 7201 MatNullSpace CoarseNullSpace = NULL; 7202 ISLocalToGlobalMapping coarse_islg; 7203 IS coarse_is,*isarray; 7204 PetscInt i,im_active=-1,active_procs=-1; 7205 PetscInt nis,nisdofs,nisneu,nisvert; 7206 PC pc_temp; 7207 PCType coarse_pc_type; 7208 KSPType coarse_ksp_type; 7209 PetscBool multilevel_requested,multilevel_allowed; 7210 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7211 PetscInt ncoarse,nedcfield; 7212 PetscBool compute_vecs = PETSC_FALSE; 7213 PetscScalar *array; 7214 MatReuse coarse_mat_reuse; 7215 PetscBool restr, full_restr, have_void; 7216 PetscMPIInt commsize; 7217 PetscErrorCode ierr; 7218 7219 PetscFunctionBegin; 7220 /* Assign global numbering to coarse dofs */ 7221 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7222 PetscInt ocoarse_size; 7223 compute_vecs = PETSC_TRUE; 7224 7225 pcbddc->new_primal_space = PETSC_TRUE; 7226 ocoarse_size = pcbddc->coarse_size; 7227 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7228 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7229 /* see if we can avoid some work */ 7230 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7231 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7232 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7233 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7234 coarse_reuse = PETSC_FALSE; 7235 } else { /* we can safely reuse already computed coarse matrix */ 7236 coarse_reuse = PETSC_TRUE; 7237 } 7238 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7239 coarse_reuse = PETSC_FALSE; 7240 } 7241 /* reset any subassembling information */ 7242 if (!coarse_reuse || pcbddc->recompute_topography) { 7243 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7244 } 7245 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7246 coarse_reuse = PETSC_TRUE; 7247 } 7248 /* assemble coarse matrix */ 7249 if (coarse_reuse && pcbddc->coarse_ksp) { 7250 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7251 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7252 coarse_mat_reuse = MAT_REUSE_MATRIX; 7253 } else { 7254 coarse_mat = NULL; 7255 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7256 } 7257 7258 /* creates temporary l2gmap and IS for coarse indexes */ 7259 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7260 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7261 7262 /* creates temporary MATIS object for coarse matrix */ 7263 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7264 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7265 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7266 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7267 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7268 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7269 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7270 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7271 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7272 7273 /* count "active" (i.e. with positive local size) and "void" processes */ 7274 im_active = !!(pcis->n); 7275 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7276 7277 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7278 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7279 /* full_restr : just use the receivers from the subassembling pattern */ 7280 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7281 coarse_mat_is = NULL; 7282 multilevel_allowed = PETSC_FALSE; 7283 multilevel_requested = PETSC_FALSE; 7284 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7285 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7286 if (multilevel_requested) { 7287 ncoarse = active_procs/pcbddc->coarsening_ratio; 7288 restr = PETSC_FALSE; 7289 full_restr = PETSC_FALSE; 7290 } else { 7291 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7292 restr = PETSC_TRUE; 7293 full_restr = PETSC_TRUE; 7294 } 7295 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7296 ncoarse = PetscMax(1,ncoarse); 7297 if (!pcbddc->coarse_subassembling) { 7298 if (pcbddc->coarsening_ratio > 1) { 7299 if (multilevel_requested) { 7300 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7301 } else { 7302 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7303 } 7304 } else { 7305 PetscMPIInt rank; 7306 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7307 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7308 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7309 } 7310 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7311 PetscInt psum; 7312 if (pcbddc->coarse_ksp) psum = 1; 7313 else psum = 0; 7314 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7315 if (ncoarse < commsize) have_void = PETSC_TRUE; 7316 } 7317 /* determine if we can go multilevel */ 7318 if (multilevel_requested) { 7319 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7320 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7321 } 7322 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7323 7324 /* dump subassembling pattern */ 7325 if (pcbddc->dbg_flag && multilevel_allowed) { 7326 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7327 } 7328 7329 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7330 nedcfield = -1; 7331 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7332 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7333 const PetscInt *idxs; 7334 ISLocalToGlobalMapping tmap; 7335 7336 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7337 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7338 /* allocate space for temporary storage */ 7339 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7340 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7341 /* allocate for IS array */ 7342 nisdofs = pcbddc->n_ISForDofsLocal; 7343 if (pcbddc->nedclocal) { 7344 if (pcbddc->nedfield > -1) { 7345 nedcfield = pcbddc->nedfield; 7346 } else { 7347 nedcfield = 0; 7348 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7349 nisdofs = 1; 7350 } 7351 } 7352 nisneu = !!pcbddc->NeumannBoundariesLocal; 7353 nisvert = 0; /* nisvert is not used */ 7354 nis = nisdofs + nisneu + nisvert; 7355 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7356 /* dofs splitting */ 7357 for (i=0;i<nisdofs;i++) { 7358 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7359 if (nedcfield != i) { 7360 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7361 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7362 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7363 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7364 } else { 7365 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7366 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7367 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7368 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7369 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7370 } 7371 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7372 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7373 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7374 } 7375 /* neumann boundaries */ 7376 if (pcbddc->NeumannBoundariesLocal) { 7377 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7378 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7379 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7380 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7381 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7382 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7383 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7384 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7385 } 7386 /* free memory */ 7387 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7388 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7389 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7390 } else { 7391 nis = 0; 7392 nisdofs = 0; 7393 nisneu = 0; 7394 nisvert = 0; 7395 isarray = NULL; 7396 } 7397 /* destroy no longer needed map */ 7398 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7399 7400 /* subassemble */ 7401 if (multilevel_allowed) { 7402 Vec vp[1]; 7403 PetscInt nvecs = 0; 7404 PetscBool reuse,reuser; 7405 7406 if (coarse_mat) reuse = PETSC_TRUE; 7407 else reuse = PETSC_FALSE; 7408 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7409 vp[0] = NULL; 7410 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7411 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7412 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7413 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7414 nvecs = 1; 7415 7416 if (pcbddc->divudotp) { 7417 Mat B,loc_divudotp; 7418 Vec v,p; 7419 IS dummy; 7420 PetscInt np; 7421 7422 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7423 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7424 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7425 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7426 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7427 ierr = VecSet(p,1.);CHKERRQ(ierr); 7428 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7429 ierr = VecDestroy(&p);CHKERRQ(ierr); 7430 ierr = MatDestroy(&B);CHKERRQ(ierr); 7431 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7432 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7433 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7434 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7435 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7436 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7437 ierr = VecDestroy(&v);CHKERRQ(ierr); 7438 } 7439 } 7440 if (reuser) { 7441 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7442 } else { 7443 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7444 } 7445 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7446 PetscScalar *arraym,*arrayv; 7447 PetscInt nl; 7448 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7449 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7450 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7451 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7452 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7453 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7454 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7455 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7456 } else { 7457 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7458 } 7459 } else { 7460 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7461 } 7462 if (coarse_mat_is || coarse_mat) { 7463 PetscMPIInt size; 7464 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7465 if (!multilevel_allowed) { 7466 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7467 } else { 7468 Mat A; 7469 7470 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7471 if (coarse_mat_is) { 7472 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7473 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7474 coarse_mat = coarse_mat_is; 7475 } 7476 /* be sure we don't have MatSeqDENSE as local mat */ 7477 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7478 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7479 } 7480 } 7481 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7482 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7483 7484 /* create local to global scatters for coarse problem */ 7485 if (compute_vecs) { 7486 PetscInt lrows; 7487 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7488 if (coarse_mat) { 7489 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7490 } else { 7491 lrows = 0; 7492 } 7493 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7494 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7495 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7496 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7497 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7498 } 7499 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7500 7501 /* set defaults for coarse KSP and PC */ 7502 if (multilevel_allowed) { 7503 coarse_ksp_type = KSPRICHARDSON; 7504 coarse_pc_type = PCBDDC; 7505 } else { 7506 coarse_ksp_type = KSPPREONLY; 7507 coarse_pc_type = PCREDUNDANT; 7508 } 7509 7510 /* print some info if requested */ 7511 if (pcbddc->dbg_flag) { 7512 if (!multilevel_allowed) { 7513 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7514 if (multilevel_requested) { 7515 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7516 } else if (pcbddc->max_levels) { 7517 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7518 } 7519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7520 } 7521 } 7522 7523 /* communicate coarse discrete gradient */ 7524 coarseG = NULL; 7525 if (pcbddc->nedcG && multilevel_allowed) { 7526 MPI_Comm ccomm; 7527 if (coarse_mat) { 7528 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7529 } else { 7530 ccomm = MPI_COMM_NULL; 7531 } 7532 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7533 } 7534 7535 /* create the coarse KSP object only once with defaults */ 7536 if (coarse_mat) { 7537 PetscViewer dbg_viewer = NULL; 7538 if (pcbddc->dbg_flag) { 7539 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7540 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7541 } 7542 if (!pcbddc->coarse_ksp) { 7543 char prefix[256],str_level[16]; 7544 size_t len; 7545 7546 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7547 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7548 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7549 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7550 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7551 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7552 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7553 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7554 /* TODO is this logic correct? should check for coarse_mat type */ 7555 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7556 /* prefix */ 7557 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7558 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7559 if (!pcbddc->current_level) { 7560 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7561 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7562 } else { 7563 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7564 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7565 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7566 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7567 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7568 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7569 } 7570 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7571 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7572 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7573 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7574 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7575 /* allow user customization */ 7576 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7577 } 7578 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7579 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7580 if (nisdofs) { 7581 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7582 for (i=0;i<nisdofs;i++) { 7583 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7584 } 7585 } 7586 if (nisneu) { 7587 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7588 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7589 } 7590 if (nisvert) { 7591 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7592 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7593 } 7594 if (coarseG) { 7595 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7596 } 7597 7598 /* get some info after set from options */ 7599 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7600 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7601 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7602 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7603 if (isbddc && !multilevel_allowed) { 7604 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7605 isbddc = PETSC_FALSE; 7606 } 7607 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7608 if (multilevel_requested && !isbddc && !isnn) { 7609 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7610 isbddc = PETSC_TRUE; 7611 isnn = PETSC_FALSE; 7612 } 7613 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7614 if (isredundant) { 7615 KSP inner_ksp; 7616 PC inner_pc; 7617 7618 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7619 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7620 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7621 } 7622 7623 /* parameters which miss an API */ 7624 if (isbddc) { 7625 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7626 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7627 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7628 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7629 if (pcbddc_coarse->benign_saddle_point) { 7630 Mat coarsedivudotp_is; 7631 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7632 IS row,col; 7633 const PetscInt *gidxs; 7634 PetscInt n,st,M,N; 7635 7636 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7637 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7638 st = st-n; 7639 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7640 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7641 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7642 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7643 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7644 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7645 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7646 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7647 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7648 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7649 ierr = ISDestroy(&row);CHKERRQ(ierr); 7650 ierr = ISDestroy(&col);CHKERRQ(ierr); 7651 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7652 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7653 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7654 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7655 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7656 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7657 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7658 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7659 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7660 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7661 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7662 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7663 } 7664 } 7665 7666 /* propagate symmetry info of coarse matrix */ 7667 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7668 if (pc->pmat->symmetric_set) { 7669 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7670 } 7671 if (pc->pmat->hermitian_set) { 7672 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7673 } 7674 if (pc->pmat->spd_set) { 7675 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7676 } 7677 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7678 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7679 } 7680 /* set operators */ 7681 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7682 if (pcbddc->dbg_flag) { 7683 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7684 } 7685 } 7686 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7687 ierr = PetscFree(isarray);CHKERRQ(ierr); 7688 #if 0 7689 { 7690 PetscViewer viewer; 7691 char filename[256]; 7692 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7693 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7694 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7695 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7696 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7697 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7698 } 7699 #endif 7700 7701 if (pcbddc->coarse_ksp) { 7702 Vec crhs,csol; 7703 7704 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7705 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7706 if (!csol) { 7707 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7708 } 7709 if (!crhs) { 7710 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7711 } 7712 } 7713 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7714 7715 /* compute null space for coarse solver if the benign trick has been requested */ 7716 if (pcbddc->benign_null) { 7717 7718 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7719 for (i=0;i<pcbddc->benign_n;i++) { 7720 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7721 } 7722 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7723 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7724 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7725 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7726 if (coarse_mat) { 7727 Vec nullv; 7728 PetscScalar *array,*array2; 7729 PetscInt nl; 7730 7731 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7732 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7733 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7734 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7735 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7736 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7737 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7738 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7739 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7740 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7741 } 7742 } 7743 7744 if (pcbddc->coarse_ksp) { 7745 PetscBool ispreonly; 7746 7747 if (CoarseNullSpace) { 7748 PetscBool isnull; 7749 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7750 if (isnull) { 7751 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7752 } 7753 /* TODO: add local nullspaces (if any) */ 7754 } 7755 /* setup coarse ksp */ 7756 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7757 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7758 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7759 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7760 KSP check_ksp; 7761 KSPType check_ksp_type; 7762 PC check_pc; 7763 Vec check_vec,coarse_vec; 7764 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7765 PetscInt its; 7766 PetscBool compute_eigs; 7767 PetscReal *eigs_r,*eigs_c; 7768 PetscInt neigs; 7769 const char *prefix; 7770 7771 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7772 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7773 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7774 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7775 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7776 /* prevent from setup unneeded object */ 7777 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7778 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7779 if (ispreonly) { 7780 check_ksp_type = KSPPREONLY; 7781 compute_eigs = PETSC_FALSE; 7782 } else { 7783 check_ksp_type = KSPGMRES; 7784 compute_eigs = PETSC_TRUE; 7785 } 7786 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7787 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7788 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7789 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7790 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7791 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7792 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7793 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7794 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7795 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7796 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7797 /* create random vec */ 7798 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7799 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7800 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7801 /* solve coarse problem */ 7802 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7803 /* set eigenvalue estimation if preonly has not been requested */ 7804 if (compute_eigs) { 7805 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7806 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7807 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7808 if (neigs) { 7809 lambda_max = eigs_r[neigs-1]; 7810 lambda_min = eigs_r[0]; 7811 if (pcbddc->use_coarse_estimates) { 7812 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7813 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7814 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7815 } 7816 } 7817 } 7818 } 7819 7820 /* check coarse problem residual error */ 7821 if (pcbddc->dbg_flag) { 7822 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7823 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7824 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7825 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7826 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7827 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7828 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7829 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7830 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7831 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7832 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7833 if (CoarseNullSpace) { 7834 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7835 } 7836 if (compute_eigs) { 7837 PetscReal lambda_max_s,lambda_min_s; 7838 KSPConvergedReason reason; 7839 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7840 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7841 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7842 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7843 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7844 for (i=0;i<neigs;i++) { 7845 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7846 } 7847 } 7848 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7849 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7850 } 7851 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7852 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7853 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7854 if (compute_eigs) { 7855 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7856 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7857 } 7858 } 7859 } 7860 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7861 /* print additional info */ 7862 if (pcbddc->dbg_flag) { 7863 /* waits until all processes reaches this point */ 7864 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7865 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7866 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7867 } 7868 7869 /* free memory */ 7870 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7871 PetscFunctionReturn(0); 7872 } 7873 7874 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7875 { 7876 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7877 PC_IS* pcis = (PC_IS*)pc->data; 7878 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7879 IS subset,subset_mult,subset_n; 7880 PetscInt local_size,coarse_size=0; 7881 PetscInt *local_primal_indices=NULL; 7882 const PetscInt *t_local_primal_indices; 7883 PetscErrorCode ierr; 7884 7885 PetscFunctionBegin; 7886 /* Compute global number of coarse dofs */ 7887 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7888 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7889 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7890 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7891 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7892 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7893 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7894 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7895 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7896 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7897 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7898 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7899 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7900 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7901 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7902 7903 /* check numbering */ 7904 if (pcbddc->dbg_flag) { 7905 PetscScalar coarsesum,*array,*array2; 7906 PetscInt i; 7907 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7908 7909 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7910 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7911 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7912 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7913 /* counter */ 7914 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7915 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7916 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7917 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7918 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7919 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7920 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7921 for (i=0;i<pcbddc->local_primal_size;i++) { 7922 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7923 } 7924 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7925 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7926 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7927 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7928 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7929 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7930 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7931 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7932 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7933 for (i=0;i<pcis->n;i++) { 7934 if (array[i] != 0.0 && array[i] != array2[i]) { 7935 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7936 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7937 set_error = PETSC_TRUE; 7938 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7939 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7940 } 7941 } 7942 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7943 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7945 for (i=0;i<pcis->n;i++) { 7946 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7947 } 7948 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7949 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7950 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7951 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7952 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7954 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7955 PetscInt *gidxs; 7956 7957 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7958 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7959 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7960 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7962 for (i=0;i<pcbddc->local_primal_size;i++) { 7963 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7964 } 7965 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7966 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7967 } 7968 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7969 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7970 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7971 } 7972 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7973 /* get back data */ 7974 *coarse_size_n = coarse_size; 7975 *local_primal_indices_n = local_primal_indices; 7976 PetscFunctionReturn(0); 7977 } 7978 7979 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7980 { 7981 IS localis_t; 7982 PetscInt i,lsize,*idxs,n; 7983 PetscScalar *vals; 7984 PetscErrorCode ierr; 7985 7986 PetscFunctionBegin; 7987 /* get indices in local ordering exploiting local to global map */ 7988 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7989 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7990 for (i=0;i<lsize;i++) vals[i] = 1.0; 7991 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7992 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7993 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7994 if (idxs) { /* multilevel guard */ 7995 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7996 } 7997 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7998 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7999 ierr = PetscFree(vals);CHKERRQ(ierr); 8000 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8001 /* now compute set in local ordering */ 8002 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8003 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8004 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8005 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8006 for (i=0,lsize=0;i<n;i++) { 8007 if (PetscRealPart(vals[i]) > 0.5) { 8008 lsize++; 8009 } 8010 } 8011 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8012 for (i=0,lsize=0;i<n;i++) { 8013 if (PetscRealPart(vals[i]) > 0.5) { 8014 idxs[lsize++] = i; 8015 } 8016 } 8017 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8018 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8019 *localis = localis_t; 8020 PetscFunctionReturn(0); 8021 } 8022 8023 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8024 { 8025 PC_IS *pcis=(PC_IS*)pc->data; 8026 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8027 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8028 Mat S_j; 8029 PetscInt *used_xadj,*used_adjncy; 8030 PetscBool free_used_adj; 8031 PetscErrorCode ierr; 8032 8033 PetscFunctionBegin; 8034 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8035 free_used_adj = PETSC_FALSE; 8036 if (pcbddc->sub_schurs_layers == -1) { 8037 used_xadj = NULL; 8038 used_adjncy = NULL; 8039 } else { 8040 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8041 used_xadj = pcbddc->mat_graph->xadj; 8042 used_adjncy = pcbddc->mat_graph->adjncy; 8043 } else if (pcbddc->computed_rowadj) { 8044 used_xadj = pcbddc->mat_graph->xadj; 8045 used_adjncy = pcbddc->mat_graph->adjncy; 8046 } else { 8047 PetscBool flg_row=PETSC_FALSE; 8048 const PetscInt *xadj,*adjncy; 8049 PetscInt nvtxs; 8050 8051 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8052 if (flg_row) { 8053 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8054 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8055 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8056 free_used_adj = PETSC_TRUE; 8057 } else { 8058 pcbddc->sub_schurs_layers = -1; 8059 used_xadj = NULL; 8060 used_adjncy = NULL; 8061 } 8062 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8063 } 8064 } 8065 8066 /* setup sub_schurs data */ 8067 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8068 if (!sub_schurs->schur_explicit) { 8069 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8070 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8071 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8072 } else { 8073 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8074 PetscBool isseqaij,need_change = PETSC_FALSE; 8075 PetscInt benign_n; 8076 Mat change = NULL; 8077 Vec scaling = NULL; 8078 IS change_primal = NULL; 8079 8080 if (!pcbddc->use_vertices && reuse_solvers) { 8081 PetscInt n_vertices; 8082 8083 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8084 reuse_solvers = (PetscBool)!n_vertices; 8085 } 8086 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8087 if (!isseqaij) { 8088 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8089 if (matis->A == pcbddc->local_mat) { 8090 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8091 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8092 } else { 8093 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8094 } 8095 } 8096 if (!pcbddc->benign_change_explicit) { 8097 benign_n = pcbddc->benign_n; 8098 } else { 8099 benign_n = 0; 8100 } 8101 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8102 We need a global reduction to avoid possible deadlocks. 8103 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8104 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8105 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8106 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8107 need_change = (PetscBool)(!need_change); 8108 } 8109 /* If the user defines additional constraints, we import them here. 8110 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8111 if (need_change) { 8112 PC_IS *pcisf; 8113 PC_BDDC *pcbddcf; 8114 PC pcf; 8115 8116 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8117 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8118 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8119 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8120 8121 /* hacks */ 8122 pcisf = (PC_IS*)pcf->data; 8123 pcisf->is_B_local = pcis->is_B_local; 8124 pcisf->vec1_N = pcis->vec1_N; 8125 pcisf->BtoNmap = pcis->BtoNmap; 8126 pcisf->n = pcis->n; 8127 pcisf->n_B = pcis->n_B; 8128 pcbddcf = (PC_BDDC*)pcf->data; 8129 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8130 pcbddcf->mat_graph = pcbddc->mat_graph; 8131 pcbddcf->use_faces = PETSC_TRUE; 8132 pcbddcf->use_change_of_basis = PETSC_TRUE; 8133 pcbddcf->use_change_on_faces = PETSC_TRUE; 8134 pcbddcf->use_qr_single = PETSC_TRUE; 8135 pcbddcf->fake_change = PETSC_TRUE; 8136 8137 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8138 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8139 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8140 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8141 change = pcbddcf->ConstraintMatrix; 8142 pcbddcf->ConstraintMatrix = NULL; 8143 8144 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8145 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8146 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8147 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8148 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8149 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8150 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8151 pcf->ops->destroy = NULL; 8152 pcf->ops->reset = NULL; 8153 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8154 } 8155 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8156 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8157 ierr = MatDestroy(&change);CHKERRQ(ierr); 8158 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8159 } 8160 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8161 8162 /* free adjacency */ 8163 if (free_used_adj) { 8164 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8165 } 8166 PetscFunctionReturn(0); 8167 } 8168 8169 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8170 { 8171 PC_IS *pcis=(PC_IS*)pc->data; 8172 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8173 PCBDDCGraph graph; 8174 PetscErrorCode ierr; 8175 8176 PetscFunctionBegin; 8177 /* attach interface graph for determining subsets */ 8178 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8179 IS verticesIS,verticescomm; 8180 PetscInt vsize,*idxs; 8181 8182 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8183 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8184 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8185 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8186 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8187 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8188 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8189 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8190 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8191 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8192 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8193 } else { 8194 graph = pcbddc->mat_graph; 8195 } 8196 /* print some info */ 8197 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8198 IS vertices; 8199 PetscInt nv,nedges,nfaces; 8200 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8201 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8202 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8203 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8204 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8205 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8206 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8208 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8209 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8210 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8211 } 8212 8213 /* sub_schurs init */ 8214 if (!pcbddc->sub_schurs) { 8215 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8216 } 8217 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8218 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8219 8220 /* free graph struct */ 8221 if (pcbddc->sub_schurs_rebuild) { 8222 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8223 } 8224 PetscFunctionReturn(0); 8225 } 8226 8227 PetscErrorCode PCBDDCCheckOperator(PC pc) 8228 { 8229 PC_IS *pcis=(PC_IS*)pc->data; 8230 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8231 PetscErrorCode ierr; 8232 8233 PetscFunctionBegin; 8234 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8235 IS zerodiag = NULL; 8236 Mat S_j,B0_B=NULL; 8237 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8238 PetscScalar *p0_check,*array,*array2; 8239 PetscReal norm; 8240 PetscInt i; 8241 8242 /* B0 and B0_B */ 8243 if (zerodiag) { 8244 IS dummy; 8245 8246 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8247 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8248 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8249 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8250 } 8251 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8252 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8253 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8254 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8255 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8256 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8257 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8258 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8259 /* S_j */ 8260 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8261 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8262 8263 /* mimic vector in \widetilde{W}_\Gamma */ 8264 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8265 /* continuous in primal space */ 8266 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8267 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8268 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8269 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8270 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8271 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8272 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8273 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8274 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8275 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8276 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8277 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8278 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8279 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8280 8281 /* assemble rhs for coarse problem */ 8282 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8283 /* local with Schur */ 8284 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8285 if (zerodiag) { 8286 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8287 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8288 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8289 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8290 } 8291 /* sum on primal nodes the local contributions */ 8292 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8293 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8294 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8295 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8296 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8297 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8298 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8299 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8300 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8301 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8302 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8303 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8304 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8305 /* scale primal nodes (BDDC sums contibutions) */ 8306 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8307 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8308 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8309 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8310 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8311 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8312 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8313 /* global: \widetilde{B0}_B w_\Gamma */ 8314 if (zerodiag) { 8315 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8316 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8317 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8318 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8319 } 8320 /* BDDC */ 8321 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8322 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8323 8324 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8325 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8326 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8327 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8328 for (i=0;i<pcbddc->benign_n;i++) { 8329 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8330 } 8331 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8332 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8333 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8334 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8335 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8336 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8337 } 8338 PetscFunctionReturn(0); 8339 } 8340 8341 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8342 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8343 { 8344 Mat At; 8345 IS rows; 8346 PetscInt rst,ren; 8347 PetscErrorCode ierr; 8348 PetscLayout rmap; 8349 8350 PetscFunctionBegin; 8351 rst = ren = 0; 8352 if (ccomm != MPI_COMM_NULL) { 8353 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8354 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8355 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8356 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8357 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8358 } 8359 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8360 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8361 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8362 8363 if (ccomm != MPI_COMM_NULL) { 8364 Mat_MPIAIJ *a,*b; 8365 IS from,to; 8366 Vec gvec; 8367 PetscInt lsize; 8368 8369 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8370 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8371 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8372 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8373 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8374 a = (Mat_MPIAIJ*)At->data; 8375 b = (Mat_MPIAIJ*)(*B)->data; 8376 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8377 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8378 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8379 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8380 b->A = a->A; 8381 b->B = a->B; 8382 8383 b->donotstash = a->donotstash; 8384 b->roworiented = a->roworiented; 8385 b->rowindices = 0; 8386 b->rowvalues = 0; 8387 b->getrowactive = PETSC_FALSE; 8388 8389 (*B)->rmap = rmap; 8390 (*B)->factortype = A->factortype; 8391 (*B)->assembled = PETSC_TRUE; 8392 (*B)->insertmode = NOT_SET_VALUES; 8393 (*B)->preallocated = PETSC_TRUE; 8394 8395 if (a->colmap) { 8396 #if defined(PETSC_USE_CTABLE) 8397 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8398 #else 8399 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8400 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8401 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8402 #endif 8403 } else b->colmap = 0; 8404 if (a->garray) { 8405 PetscInt len; 8406 len = a->B->cmap->n; 8407 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8408 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8409 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8410 } else b->garray = 0; 8411 8412 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8413 b->lvec = a->lvec; 8414 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8415 8416 /* cannot use VecScatterCopy */ 8417 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8418 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8419 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8420 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8421 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8422 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8423 ierr = ISDestroy(&from);CHKERRQ(ierr); 8424 ierr = ISDestroy(&to);CHKERRQ(ierr); 8425 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8426 } 8427 ierr = MatDestroy(&At);CHKERRQ(ierr); 8428 PetscFunctionReturn(0); 8429 } 8430