1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 PetscScalar *vals; 1518 const PetscScalar *array; 1519 PetscInt i,maxneighs,maxsize; 1520 PetscInt n_neigh,*neigh,*n_shared,**shared; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1526 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1527 if (!maxneighs) { 1528 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1529 *nnsp = NULL; 1530 PetscFunctionReturn(0); 1531 } 1532 maxsize = 0; 1533 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1534 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1535 /* create vectors to hold quadrature weights */ 1536 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1537 if (!transpose) { 1538 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1539 } else { 1540 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1541 } 1542 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1543 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1544 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1545 for (i=0;i<maxneighs;i++) { 1546 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1547 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1548 } 1549 1550 /* compute local quad vec */ 1551 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1552 if (!transpose) { 1553 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1554 } else { 1555 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1556 } 1557 ierr = VecSet(p,1.);CHKERRQ(ierr); 1558 if (!transpose) { 1559 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1560 } else { 1561 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1562 } 1563 if (vl2l) { 1564 Mat lA; 1565 VecScatter sc; 1566 1567 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1568 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1569 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1570 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1573 } else { 1574 vins = v; 1575 } 1576 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1577 ierr = VecDestroy(&p);CHKERRQ(ierr); 1578 1579 /* insert in global quadrature vecs */ 1580 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1581 for (i=0;i<n_neigh;i++) { 1582 const PetscInt *idxs; 1583 PetscInt idx,nn,j; 1584 1585 idxs = shared[i]; 1586 nn = n_shared[i]; 1587 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1588 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1589 idx = -(idx+1); 1590 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1591 } 1592 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1593 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1594 if (vl2l) { 1595 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1596 } 1597 ierr = VecDestroy(&v);CHKERRQ(ierr); 1598 ierr = PetscFree(vals);CHKERRQ(ierr); 1599 1600 /* assemble near null space */ 1601 for (i=0;i<maxneighs;i++) { 1602 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1603 } 1604 for (i=0;i<maxneighs;i++) { 1605 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1606 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1607 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1608 } 1609 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1614 { 1615 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1616 PetscErrorCode ierr; 1617 1618 PetscFunctionBegin; 1619 if (primalv) { 1620 if (pcbddc->user_primal_vertices_local) { 1621 IS list[2], newp; 1622 1623 list[0] = primalv; 1624 list[1] = pcbddc->user_primal_vertices_local; 1625 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1626 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1627 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1628 pcbddc->user_primal_vertices_local = newp; 1629 } else { 1630 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1631 } 1632 } 1633 PetscFunctionReturn(0); 1634 } 1635 1636 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1637 { 1638 PetscInt f, *comp = (PetscInt *)ctx; 1639 1640 PetscFunctionBegin; 1641 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1646 { 1647 PetscErrorCode ierr; 1648 Vec local,global; 1649 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1650 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1651 PetscBool monolithic = PETSC_FALSE; 1652 1653 PetscFunctionBegin; 1654 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1655 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1656 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1657 /* need to convert from global to local topology information and remove references to information in global ordering */ 1658 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1659 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1660 if (monolithic) { /* just get block size to properly compute vertices */ 1661 if (pcbddc->vertex_size == 1) { 1662 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1663 } 1664 goto boundary; 1665 } 1666 1667 if (pcbddc->user_provided_isfordofs) { 1668 if (pcbddc->n_ISForDofs) { 1669 PetscInt i; 1670 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1671 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1672 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1673 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1674 } 1675 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1676 pcbddc->n_ISForDofs = 0; 1677 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1678 } 1679 } else { 1680 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1681 DM dm; 1682 1683 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1684 if (!dm) { 1685 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1686 } 1687 if (dm) { 1688 IS *fields; 1689 PetscInt nf,i; 1690 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1691 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1692 for (i=0;i<nf;i++) { 1693 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1694 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1695 } 1696 ierr = PetscFree(fields);CHKERRQ(ierr); 1697 pcbddc->n_ISForDofsLocal = nf; 1698 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1699 PetscContainer c; 1700 1701 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1702 if (c) { 1703 MatISLocalFields lf; 1704 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1705 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1706 } else { /* fallback, create the default fields if bs > 1 */ 1707 PetscInt i, n = matis->A->rmap->n; 1708 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1709 if (i > 1) { 1710 pcbddc->n_ISForDofsLocal = i; 1711 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1712 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1713 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1714 } 1715 } 1716 } 1717 } 1718 } else { 1719 PetscInt i; 1720 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1721 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1722 } 1723 } 1724 } 1725 1726 boundary: 1727 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1728 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1729 } else if (pcbddc->DirichletBoundariesLocal) { 1730 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1731 } 1732 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1733 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1734 } else if (pcbddc->NeumannBoundariesLocal) { 1735 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1736 } 1737 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1738 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1739 } 1740 ierr = VecDestroy(&global);CHKERRQ(ierr); 1741 ierr = VecDestroy(&local);CHKERRQ(ierr); 1742 /* detect local disconnected subdomains if requested (use matis->A) */ 1743 if (pcbddc->detect_disconnected) { 1744 IS primalv = NULL; 1745 PetscInt i; 1746 1747 for (i=0;i<pcbddc->n_local_subs;i++) { 1748 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1749 } 1750 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1751 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1752 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1753 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1754 } 1755 /* early stage corner detection */ 1756 { 1757 DM dm; 1758 1759 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1760 if (dm) { 1761 PetscBool isda; 1762 1763 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1764 if (isda) { 1765 ISLocalToGlobalMapping l2l; 1766 IS corners; 1767 Mat lA; 1768 1769 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1770 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1771 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1772 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1773 if (l2l) { 1774 const PetscInt *idx; 1775 PetscInt bs,*idxout,n; 1776 1777 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1778 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1779 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1780 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1781 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1782 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1783 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1784 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1785 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1786 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1787 pcbddc->corner_selected = PETSC_TRUE; 1788 } else { /* not from DMDA */ 1789 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1790 } 1791 } 1792 } 1793 } 1794 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1795 DM dm; 1796 1797 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1798 if (!dm) { 1799 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1800 } 1801 if (dm) { 1802 Vec vcoords; 1803 PetscSection section; 1804 PetscReal *coords; 1805 PetscInt d,cdim,nl,nf,**ctxs; 1806 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1807 1808 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1809 ierr = DMGetDefaultSection(dm,§ion);CHKERRQ(ierr); 1810 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1811 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1812 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1813 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1814 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1815 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1816 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1817 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1818 for (d=0;d<cdim;d++) { 1819 PetscInt i; 1820 const PetscScalar *v; 1821 1822 for (i=0;i<nf;i++) ctxs[i][0] = d; 1823 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1824 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1825 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1826 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1827 } 1828 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1829 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1830 ierr = PetscFree(coords);CHKERRQ(ierr); 1831 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1832 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1833 } 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1839 { 1840 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1841 PetscErrorCode ierr; 1842 IS nis; 1843 const PetscInt *idxs; 1844 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1845 PetscBool *ld; 1846 1847 PetscFunctionBegin; 1848 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1849 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1850 if (mop == MPI_LAND) { 1851 /* init rootdata with true */ 1852 ld = (PetscBool*) matis->sf_rootdata; 1853 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1854 } else { 1855 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1856 } 1857 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1858 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1859 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1860 ld = (PetscBool*) matis->sf_leafdata; 1861 for (i=0;i<nd;i++) 1862 if (-1 < idxs[i] && idxs[i] < n) 1863 ld[idxs[i]] = PETSC_TRUE; 1864 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1865 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1866 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1867 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1868 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1869 if (mop == MPI_LAND) { 1870 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1871 } else { 1872 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1873 } 1874 for (i=0,nnd=0;i<n;i++) 1875 if (ld[i]) 1876 nidxs[nnd++] = i; 1877 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1878 ierr = ISDestroy(is);CHKERRQ(ierr); 1879 *is = nis; 1880 PetscFunctionReturn(0); 1881 } 1882 1883 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1884 { 1885 PC_IS *pcis = (PC_IS*)(pc->data); 1886 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1887 PetscErrorCode ierr; 1888 1889 PetscFunctionBegin; 1890 if (!pcbddc->benign_have_null) { 1891 PetscFunctionReturn(0); 1892 } 1893 if (pcbddc->ChangeOfBasisMatrix) { 1894 Vec swap; 1895 1896 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1897 swap = pcbddc->work_change; 1898 pcbddc->work_change = r; 1899 r = swap; 1900 } 1901 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1902 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1903 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1904 ierr = VecSet(z,0.);CHKERRQ(ierr); 1905 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1906 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1907 if (pcbddc->ChangeOfBasisMatrix) { 1908 pcbddc->work_change = r; 1909 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1910 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1916 { 1917 PCBDDCBenignMatMult_ctx ctx; 1918 PetscErrorCode ierr; 1919 PetscBool apply_right,apply_left,reset_x; 1920 1921 PetscFunctionBegin; 1922 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1923 if (transpose) { 1924 apply_right = ctx->apply_left; 1925 apply_left = ctx->apply_right; 1926 } else { 1927 apply_right = ctx->apply_right; 1928 apply_left = ctx->apply_left; 1929 } 1930 reset_x = PETSC_FALSE; 1931 if (apply_right) { 1932 const PetscScalar *ax; 1933 PetscInt nl,i; 1934 1935 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1936 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1937 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1938 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1939 for (i=0;i<ctx->benign_n;i++) { 1940 PetscScalar sum,val; 1941 const PetscInt *idxs; 1942 PetscInt nz,j; 1943 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1944 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1945 sum = 0.; 1946 if (ctx->apply_p0) { 1947 val = ctx->work[idxs[nz-1]]; 1948 for (j=0;j<nz-1;j++) { 1949 sum += ctx->work[idxs[j]]; 1950 ctx->work[idxs[j]] += val; 1951 } 1952 } else { 1953 for (j=0;j<nz-1;j++) { 1954 sum += ctx->work[idxs[j]]; 1955 } 1956 } 1957 ctx->work[idxs[nz-1]] -= sum; 1958 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1959 } 1960 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1961 reset_x = PETSC_TRUE; 1962 } 1963 if (transpose) { 1964 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1965 } else { 1966 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1967 } 1968 if (reset_x) { 1969 ierr = VecResetArray(x);CHKERRQ(ierr); 1970 } 1971 if (apply_left) { 1972 PetscScalar *ay; 1973 PetscInt i; 1974 1975 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1976 for (i=0;i<ctx->benign_n;i++) { 1977 PetscScalar sum,val; 1978 const PetscInt *idxs; 1979 PetscInt nz,j; 1980 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1981 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1982 val = -ay[idxs[nz-1]]; 1983 if (ctx->apply_p0) { 1984 sum = 0.; 1985 for (j=0;j<nz-1;j++) { 1986 sum += ay[idxs[j]]; 1987 ay[idxs[j]] += val; 1988 } 1989 ay[idxs[nz-1]] += sum; 1990 } else { 1991 for (j=0;j<nz-1;j++) { 1992 ay[idxs[j]] += val; 1993 } 1994 ay[idxs[nz-1]] = 0.; 1995 } 1996 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1997 } 1998 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2004 { 2005 PetscErrorCode ierr; 2006 2007 PetscFunctionBegin; 2008 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2013 { 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2022 { 2023 PC_IS *pcis = (PC_IS*)pc->data; 2024 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2025 PCBDDCBenignMatMult_ctx ctx; 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 if (!restore) { 2030 Mat A_IB,A_BI; 2031 PetscScalar *work; 2032 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2033 2034 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2035 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2036 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2037 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2038 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2039 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2040 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2041 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2042 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2043 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2044 ctx->apply_left = PETSC_TRUE; 2045 ctx->apply_right = PETSC_FALSE; 2046 ctx->apply_p0 = PETSC_FALSE; 2047 ctx->benign_n = pcbddc->benign_n; 2048 if (reuse) { 2049 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2050 ctx->free = PETSC_FALSE; 2051 } else { /* TODO: could be optimized for successive solves */ 2052 ISLocalToGlobalMapping N_to_D; 2053 PetscInt i; 2054 2055 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2056 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2057 for (i=0;i<pcbddc->benign_n;i++) { 2058 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2059 } 2060 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2061 ctx->free = PETSC_TRUE; 2062 } 2063 ctx->A = pcis->A_IB; 2064 ctx->work = work; 2065 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2066 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 pcis->A_IB = A_IB; 2069 2070 /* A_BI as A_IB^T */ 2071 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2072 pcbddc->benign_original_mat = pcis->A_BI; 2073 pcis->A_BI = A_BI; 2074 } else { 2075 if (!pcbddc->benign_original_mat) { 2076 PetscFunctionReturn(0); 2077 } 2078 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2079 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2080 pcis->A_IB = ctx->A; 2081 ctx->A = NULL; 2082 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2083 pcis->A_BI = pcbddc->benign_original_mat; 2084 pcbddc->benign_original_mat = NULL; 2085 if (ctx->free) { 2086 PetscInt i; 2087 for (i=0;i<ctx->benign_n;i++) { 2088 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2089 } 2090 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2091 } 2092 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2093 ierr = PetscFree(ctx);CHKERRQ(ierr); 2094 } 2095 PetscFunctionReturn(0); 2096 } 2097 2098 /* used just in bddc debug mode */ 2099 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2100 { 2101 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2102 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2103 Mat An; 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2108 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2109 if (is1) { 2110 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2111 ierr = MatDestroy(&An);CHKERRQ(ierr); 2112 } else { 2113 *B = An; 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /* TODO: add reuse flag */ 2119 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2120 { 2121 Mat Bt; 2122 PetscScalar *a,*bdata; 2123 const PetscInt *ii,*ij; 2124 PetscInt m,n,i,nnz,*bii,*bij; 2125 PetscBool flg_row; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2130 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2131 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2132 nnz = n; 2133 for (i=0;i<ii[n];i++) { 2134 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2135 } 2136 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2137 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2138 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2139 nnz = 0; 2140 bii[0] = 0; 2141 for (i=0;i<n;i++) { 2142 PetscInt j; 2143 for (j=ii[i];j<ii[i+1];j++) { 2144 PetscScalar entry = a[j]; 2145 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2146 bij[nnz] = ij[j]; 2147 bdata[nnz] = entry; 2148 nnz++; 2149 } 2150 } 2151 bii[i+1] = nnz; 2152 } 2153 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2154 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2155 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2156 { 2157 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2158 b->free_a = PETSC_TRUE; 2159 b->free_ij = PETSC_TRUE; 2160 } 2161 if (*B == A) { 2162 ierr = MatDestroy(&A);CHKERRQ(ierr); 2163 } 2164 *B = Bt; 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2169 { 2170 Mat B = NULL; 2171 DM dm; 2172 IS is_dummy,*cc_n; 2173 ISLocalToGlobalMapping l2gmap_dummy; 2174 PCBDDCGraph graph; 2175 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2176 PetscInt i,n; 2177 PetscInt *xadj,*adjncy; 2178 PetscBool isplex = PETSC_FALSE; 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 if (ncc) *ncc = 0; 2183 if (cc) *cc = NULL; 2184 if (primalv) *primalv = NULL; 2185 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2186 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2187 if (!dm) { 2188 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2189 } 2190 if (dm) { 2191 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2192 } 2193 if (isplex) { /* this code has been modified from plexpartition.c */ 2194 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2195 PetscInt *adj = NULL; 2196 IS cellNumbering; 2197 const PetscInt *cellNum; 2198 PetscBool useCone, useClosure; 2199 PetscSection section; 2200 PetscSegBuffer adjBuffer; 2201 PetscSF sfPoint; 2202 PetscErrorCode ierr; 2203 2204 PetscFunctionBegin; 2205 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2206 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2207 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2208 /* Build adjacency graph via a section/segbuffer */ 2209 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2210 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2211 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2212 /* Always use FVM adjacency to create partitioner graph */ 2213 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2214 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2215 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2216 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2217 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2218 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2219 for (n = 0, p = pStart; p < pEnd; p++) { 2220 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2221 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2222 adjSize = PETSC_DETERMINE; 2223 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2224 for (a = 0; a < adjSize; ++a) { 2225 const PetscInt point = adj[a]; 2226 if (pStart <= point && point < pEnd) { 2227 PetscInt *PETSC_RESTRICT pBuf; 2228 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2229 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2230 *pBuf = point; 2231 } 2232 } 2233 n++; 2234 } 2235 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2236 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2237 /* Derive CSR graph from section/segbuffer */ 2238 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2239 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2240 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2241 for (idx = 0, p = pStart; p < pEnd; p++) { 2242 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2243 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2244 } 2245 xadj[n] = size; 2246 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2247 /* Clean up */ 2248 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2249 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2250 ierr = PetscFree(adj);CHKERRQ(ierr); 2251 graph->xadj = xadj; 2252 graph->adjncy = adjncy; 2253 } else { 2254 Mat A; 2255 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2256 2257 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2258 if (!A->rmap->N || !A->cmap->N) { 2259 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2263 if (!isseqaij && filter) { 2264 PetscBool isseqdense; 2265 2266 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2267 if (!isseqdense) { 2268 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2269 } else { /* TODO: rectangular case and LDA */ 2270 PetscScalar *array; 2271 PetscReal chop=1.e-6; 2272 2273 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2274 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2275 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2276 for (i=0;i<n;i++) { 2277 PetscInt j; 2278 for (j=i+1;j<n;j++) { 2279 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2280 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2281 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2282 } 2283 } 2284 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2285 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2286 } 2287 } else { 2288 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2289 B = A; 2290 } 2291 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2292 2293 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2294 if (filter) { 2295 PetscScalar *data; 2296 PetscInt j,cum; 2297 2298 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2299 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2300 cum = 0; 2301 for (i=0;i<n;i++) { 2302 PetscInt t; 2303 2304 for (j=xadj[i];j<xadj[i+1];j++) { 2305 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2306 continue; 2307 } 2308 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2309 } 2310 t = xadj_filtered[i]; 2311 xadj_filtered[i] = cum; 2312 cum += t; 2313 } 2314 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2315 graph->xadj = xadj_filtered; 2316 graph->adjncy = adjncy_filtered; 2317 } else { 2318 graph->xadj = xadj; 2319 graph->adjncy = adjncy; 2320 } 2321 } 2322 /* compute local connected components using PCBDDCGraph */ 2323 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2324 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2325 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2326 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2327 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2328 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2329 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2330 2331 /* partial clean up */ 2332 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2333 if (B) { 2334 PetscBool flg_row; 2335 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2336 ierr = MatDestroy(&B);CHKERRQ(ierr); 2337 } 2338 if (isplex) { 2339 ierr = PetscFree(xadj);CHKERRQ(ierr); 2340 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2341 } 2342 2343 /* get back data */ 2344 if (isplex) { 2345 if (ncc) *ncc = graph->ncc; 2346 if (cc || primalv) { 2347 Mat A; 2348 PetscBT btv,btvt; 2349 PetscSection subSection; 2350 PetscInt *ids,cum,cump,*cids,*pids; 2351 2352 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2353 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2354 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2355 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2356 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2357 2358 cids[0] = 0; 2359 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2360 PetscInt j; 2361 2362 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2363 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2364 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2365 2366 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2367 for (k = 0; k < 2*size; k += 2) { 2368 PetscInt s, p = closure[k], off, dof, cdof; 2369 2370 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2371 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2372 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2373 for (s = 0; s < dof-cdof; s++) { 2374 if (PetscBTLookupSet(btvt,off+s)) continue; 2375 if (!PetscBTLookup(btv,off+s)) { 2376 ids[cum++] = off+s; 2377 } else { /* cross-vertex */ 2378 pids[cump++] = off+s; 2379 } 2380 } 2381 } 2382 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2383 } 2384 cids[i+1] = cum; 2385 /* mark dofs as already assigned */ 2386 for (j = cids[i]; j < cids[i+1]; j++) { 2387 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2388 } 2389 } 2390 if (cc) { 2391 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2392 for (i = 0; i < graph->ncc; i++) { 2393 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2394 } 2395 *cc = cc_n; 2396 } 2397 if (primalv) { 2398 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2399 } 2400 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2401 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2402 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2403 } 2404 } else { 2405 if (ncc) *ncc = graph->ncc; 2406 if (cc) { 2407 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2408 for (i=0;i<graph->ncc;i++) { 2409 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2410 } 2411 *cc = cc_n; 2412 } 2413 } 2414 /* clean up graph */ 2415 graph->xadj = 0; 2416 graph->adjncy = 0; 2417 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2422 { 2423 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2424 PC_IS* pcis = (PC_IS*)(pc->data); 2425 IS dirIS = NULL; 2426 PetscInt i; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2431 if (zerodiag) { 2432 Mat A; 2433 Vec vec3_N; 2434 PetscScalar *vals; 2435 const PetscInt *idxs; 2436 PetscInt nz,*count; 2437 2438 /* p0 */ 2439 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2440 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2441 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2442 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2443 for (i=0;i<nz;i++) vals[i] = 1.; 2444 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2445 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2446 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2447 /* v_I */ 2448 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2449 for (i=0;i<nz;i++) vals[i] = 0.; 2450 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2451 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2452 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2453 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2454 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2455 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2456 if (dirIS) { 2457 PetscInt n; 2458 2459 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2460 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2461 for (i=0;i<n;i++) vals[i] = 0.; 2462 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2463 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2464 } 2465 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2466 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2467 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2468 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2469 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2470 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2471 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2472 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2473 ierr = PetscFree(vals);CHKERRQ(ierr); 2474 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2475 2476 /* there should not be any pressure dofs lying on the interface */ 2477 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2478 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2479 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2480 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2481 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2482 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2483 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2484 ierr = PetscFree(count);CHKERRQ(ierr); 2485 } 2486 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2487 2488 /* check PCBDDCBenignGetOrSetP0 */ 2489 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2490 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2491 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2492 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2493 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2494 for (i=0;i<pcbddc->benign_n;i++) { 2495 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2496 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2497 } 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2502 { 2503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2504 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2505 PetscInt nz,n; 2506 PetscInt *interior_dofs,n_interior_dofs,nneu; 2507 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2512 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2513 for (n=0;n<pcbddc->benign_n;n++) { 2514 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2515 } 2516 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2517 pcbddc->benign_n = 0; 2518 2519 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2520 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2521 Checks if all the pressure dofs in each subdomain have a zero diagonal 2522 If not, a change of basis on pressures is not needed 2523 since the local Schur complements are already SPD 2524 */ 2525 has_null_pressures = PETSC_TRUE; 2526 have_null = PETSC_TRUE; 2527 if (pcbddc->n_ISForDofsLocal) { 2528 IS iP = NULL; 2529 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2530 2531 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2532 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2533 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2534 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2535 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2536 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2537 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2538 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2539 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2540 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2541 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2542 if (iP) { 2543 IS newpressures; 2544 2545 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2546 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2547 pressures = newpressures; 2548 } 2549 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2550 if (!sorted) { 2551 ierr = ISSort(pressures);CHKERRQ(ierr); 2552 } 2553 } else { 2554 pressures = NULL; 2555 } 2556 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2557 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2558 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2559 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2560 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2561 if (!sorted) { 2562 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2563 } 2564 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2565 zerodiag_save = zerodiag; 2566 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2567 if (!nz) { 2568 if (n) have_null = PETSC_FALSE; 2569 has_null_pressures = PETSC_FALSE; 2570 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2571 } 2572 recompute_zerodiag = PETSC_FALSE; 2573 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2574 zerodiag_subs = NULL; 2575 pcbddc->benign_n = 0; 2576 n_interior_dofs = 0; 2577 interior_dofs = NULL; 2578 nneu = 0; 2579 if (pcbddc->NeumannBoundariesLocal) { 2580 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2581 } 2582 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2583 if (checkb) { /* need to compute interior nodes */ 2584 PetscInt n,i,j; 2585 PetscInt n_neigh,*neigh,*n_shared,**shared; 2586 PetscInt *iwork; 2587 2588 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2589 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2590 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2591 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2592 for (i=1;i<n_neigh;i++) 2593 for (j=0;j<n_shared[i];j++) 2594 iwork[shared[i][j]] += 1; 2595 for (i=0;i<n;i++) 2596 if (!iwork[i]) 2597 interior_dofs[n_interior_dofs++] = i; 2598 ierr = PetscFree(iwork);CHKERRQ(ierr); 2599 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2600 } 2601 if (has_null_pressures) { 2602 IS *subs; 2603 PetscInt nsubs,i,j,nl; 2604 const PetscInt *idxs; 2605 PetscScalar *array; 2606 Vec *work; 2607 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2608 2609 subs = pcbddc->local_subs; 2610 nsubs = pcbddc->n_local_subs; 2611 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2612 if (checkb) { 2613 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2614 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2615 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2616 /* work[0] = 1_p */ 2617 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2618 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2619 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2620 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2621 /* work[0] = 1_v */ 2622 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2623 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2624 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2625 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2626 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2627 } 2628 if (nsubs > 1) { 2629 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2630 for (i=0;i<nsubs;i++) { 2631 ISLocalToGlobalMapping l2g; 2632 IS t_zerodiag_subs; 2633 PetscInt nl; 2634 2635 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2636 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2637 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2638 if (nl) { 2639 PetscBool valid = PETSC_TRUE; 2640 2641 if (checkb) { 2642 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2643 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2644 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2645 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2646 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2647 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2648 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2649 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2650 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2651 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2652 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2653 for (j=0;j<n_interior_dofs;j++) { 2654 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2655 valid = PETSC_FALSE; 2656 break; 2657 } 2658 } 2659 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2660 } 2661 if (valid && nneu) { 2662 const PetscInt *idxs; 2663 PetscInt nzb; 2664 2665 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2666 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2667 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2668 if (nzb) valid = PETSC_FALSE; 2669 } 2670 if (valid && pressures) { 2671 IS t_pressure_subs; 2672 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2673 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2674 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2675 } 2676 if (valid) { 2677 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2678 pcbddc->benign_n++; 2679 } else { 2680 recompute_zerodiag = PETSC_TRUE; 2681 } 2682 } 2683 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2684 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2685 } 2686 } else { /* there's just one subdomain (or zero if they have not been detected */ 2687 PetscBool valid = PETSC_TRUE; 2688 2689 if (nneu) valid = PETSC_FALSE; 2690 if (valid && pressures) { 2691 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2692 } 2693 if (valid && checkb) { 2694 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2695 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2696 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2697 for (j=0;j<n_interior_dofs;j++) { 2698 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2699 valid = PETSC_FALSE; 2700 break; 2701 } 2702 } 2703 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2704 } 2705 if (valid) { 2706 pcbddc->benign_n = 1; 2707 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2708 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2709 zerodiag_subs[0] = zerodiag; 2710 } 2711 } 2712 if (checkb) { 2713 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2714 } 2715 } 2716 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2717 2718 if (!pcbddc->benign_n) { 2719 PetscInt n; 2720 2721 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2722 recompute_zerodiag = PETSC_FALSE; 2723 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2724 if (n) { 2725 has_null_pressures = PETSC_FALSE; 2726 have_null = PETSC_FALSE; 2727 } 2728 } 2729 2730 /* final check for null pressures */ 2731 if (zerodiag && pressures) { 2732 PetscInt nz,np; 2733 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2734 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2735 if (nz != np) have_null = PETSC_FALSE; 2736 } 2737 2738 if (recompute_zerodiag) { 2739 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2740 if (pcbddc->benign_n == 1) { 2741 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2742 zerodiag = zerodiag_subs[0]; 2743 } else { 2744 PetscInt i,nzn,*new_idxs; 2745 2746 nzn = 0; 2747 for (i=0;i<pcbddc->benign_n;i++) { 2748 PetscInt ns; 2749 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2750 nzn += ns; 2751 } 2752 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2753 nzn = 0; 2754 for (i=0;i<pcbddc->benign_n;i++) { 2755 PetscInt ns,*idxs; 2756 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2757 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2758 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2759 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2760 nzn += ns; 2761 } 2762 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2763 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2764 } 2765 have_null = PETSC_FALSE; 2766 } 2767 2768 /* Prepare matrix to compute no-net-flux */ 2769 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2770 Mat A,loc_divudotp; 2771 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2772 IS row,col,isused = NULL; 2773 PetscInt M,N,n,st,n_isused; 2774 2775 if (pressures) { 2776 isused = pressures; 2777 } else { 2778 isused = zerodiag_save; 2779 } 2780 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2781 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2782 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2783 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2784 n_isused = 0; 2785 if (isused) { 2786 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2787 } 2788 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2789 st = st-n_isused; 2790 if (n) { 2791 const PetscInt *gidxs; 2792 2793 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2794 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2795 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2796 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2797 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2798 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2799 } else { 2800 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2801 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2802 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2803 } 2804 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2805 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2806 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2807 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2808 ierr = ISDestroy(&row);CHKERRQ(ierr); 2809 ierr = ISDestroy(&col);CHKERRQ(ierr); 2810 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2811 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2812 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2813 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2814 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2815 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2816 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2817 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2818 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2820 } 2821 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2822 2823 /* change of basis and p0 dofs */ 2824 if (has_null_pressures) { 2825 IS zerodiagc; 2826 const PetscInt *idxs,*idxsc; 2827 PetscInt i,s,*nnz; 2828 2829 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2830 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2831 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2832 /* local change of basis for pressures */ 2833 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2834 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2835 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2836 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2837 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2838 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2839 for (i=0;i<pcbddc->benign_n;i++) { 2840 PetscInt nzs,j; 2841 2842 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2843 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2844 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2845 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2846 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2847 } 2848 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2849 ierr = PetscFree(nnz);CHKERRQ(ierr); 2850 /* set identity on velocities */ 2851 for (i=0;i<n-nz;i++) { 2852 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2853 } 2854 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2855 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2856 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2857 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2858 /* set change on pressures */ 2859 for (s=0;s<pcbddc->benign_n;s++) { 2860 PetscScalar *array; 2861 PetscInt nzs; 2862 2863 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2864 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2865 for (i=0;i<nzs-1;i++) { 2866 PetscScalar vals[2]; 2867 PetscInt cols[2]; 2868 2869 cols[0] = idxs[i]; 2870 cols[1] = idxs[nzs-1]; 2871 vals[0] = 1.; 2872 vals[1] = 1.; 2873 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 } 2875 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2876 for (i=0;i<nzs-1;i++) array[i] = -1.; 2877 array[nzs-1] = 1.; 2878 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2879 /* store local idxs for p0 */ 2880 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2881 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2882 ierr = PetscFree(array);CHKERRQ(ierr); 2883 } 2884 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 /* project if needed */ 2887 if (pcbddc->benign_change_explicit) { 2888 Mat M; 2889 2890 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2891 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2892 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2893 ierr = MatDestroy(&M);CHKERRQ(ierr); 2894 } 2895 /* store global idxs for p0 */ 2896 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2897 } 2898 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2899 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2900 2901 /* determines if the coarse solver will be singular or not */ 2902 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2903 /* determines if the problem has subdomains with 0 pressure block */ 2904 have_null = (PetscBool)(!!pcbddc->benign_n); 2905 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2906 *zerodiaglocal = zerodiag; 2907 PetscFunctionReturn(0); 2908 } 2909 2910 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2911 { 2912 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2913 PetscScalar *array; 2914 PetscErrorCode ierr; 2915 2916 PetscFunctionBegin; 2917 if (!pcbddc->benign_sf) { 2918 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2919 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2920 } 2921 if (get) { 2922 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2923 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2924 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2925 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2926 } else { 2927 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2928 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2929 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2930 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2931 } 2932 PetscFunctionReturn(0); 2933 } 2934 2935 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2936 { 2937 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2938 PetscErrorCode ierr; 2939 2940 PetscFunctionBegin; 2941 /* TODO: add error checking 2942 - avoid nested pop (or push) calls. 2943 - cannot push before pop. 2944 - cannot call this if pcbddc->local_mat is NULL 2945 */ 2946 if (!pcbddc->benign_n) { 2947 PetscFunctionReturn(0); 2948 } 2949 if (pop) { 2950 if (pcbddc->benign_change_explicit) { 2951 IS is_p0; 2952 MatReuse reuse; 2953 2954 /* extract B_0 */ 2955 reuse = MAT_INITIAL_MATRIX; 2956 if (pcbddc->benign_B0) { 2957 reuse = MAT_REUSE_MATRIX; 2958 } 2959 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2960 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2961 /* remove rows and cols from local problem */ 2962 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2963 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2964 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2965 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2966 } else { 2967 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2968 PetscScalar *vals; 2969 PetscInt i,n,*idxs_ins; 2970 2971 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2972 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2973 if (!pcbddc->benign_B0) { 2974 PetscInt *nnz; 2975 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2976 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2977 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2978 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2979 for (i=0;i<pcbddc->benign_n;i++) { 2980 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2981 nnz[i] = n - nnz[i]; 2982 } 2983 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2984 ierr = PetscFree(nnz);CHKERRQ(ierr); 2985 } 2986 2987 for (i=0;i<pcbddc->benign_n;i++) { 2988 PetscScalar *array; 2989 PetscInt *idxs,j,nz,cum; 2990 2991 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2992 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2993 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2994 for (j=0;j<nz;j++) vals[j] = 1.; 2995 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2996 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2997 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2998 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2999 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3000 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3001 cum = 0; 3002 for (j=0;j<n;j++) { 3003 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3004 vals[cum] = array[j]; 3005 idxs_ins[cum] = j; 3006 cum++; 3007 } 3008 } 3009 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3010 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3011 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3012 } 3013 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3014 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3015 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3016 } 3017 } else { /* push */ 3018 if (pcbddc->benign_change_explicit) { 3019 PetscInt i; 3020 3021 for (i=0;i<pcbddc->benign_n;i++) { 3022 PetscScalar *B0_vals; 3023 PetscInt *B0_cols,B0_ncol; 3024 3025 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3026 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3027 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3028 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3029 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3030 } 3031 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3033 } else { 3034 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3035 } 3036 } 3037 PetscFunctionReturn(0); 3038 } 3039 3040 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3041 { 3042 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3043 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3044 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3045 PetscBLASInt *B_iwork,*B_ifail; 3046 PetscScalar *work,lwork; 3047 PetscScalar *St,*S,*eigv; 3048 PetscScalar *Sarray,*Starray; 3049 PetscReal *eigs,thresh,lthresh,uthresh; 3050 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3051 PetscBool allocated_S_St; 3052 #if defined(PETSC_USE_COMPLEX) 3053 PetscReal *rwork; 3054 #endif 3055 PetscErrorCode ierr; 3056 3057 PetscFunctionBegin; 3058 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3059 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3060 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3061 3062 if (pcbddc->dbg_flag) { 3063 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3065 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3066 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3067 } 3068 3069 if (pcbddc->dbg_flag) { 3070 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3071 } 3072 3073 /* max size of subsets */ 3074 mss = 0; 3075 for (i=0;i<sub_schurs->n_subs;i++) { 3076 PetscInt subset_size; 3077 3078 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3079 mss = PetscMax(mss,subset_size); 3080 } 3081 3082 /* min/max and threshold */ 3083 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3084 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3085 nmax = PetscMax(nmin,nmax); 3086 allocated_S_St = PETSC_FALSE; 3087 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3088 allocated_S_St = PETSC_TRUE; 3089 } 3090 3091 /* allocate lapack workspace */ 3092 cum = cum2 = 0; 3093 maxneigs = 0; 3094 for (i=0;i<sub_schurs->n_subs;i++) { 3095 PetscInt n,subset_size; 3096 3097 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3098 n = PetscMin(subset_size,nmax); 3099 cum += subset_size; 3100 cum2 += subset_size*n; 3101 maxneigs = PetscMax(maxneigs,n); 3102 } 3103 if (mss) { 3104 if (sub_schurs->is_symmetric) { 3105 PetscBLASInt B_itype = 1; 3106 PetscBLASInt B_N = mss; 3107 PetscReal zero = 0.0; 3108 PetscReal eps = 0.0; /* dlamch? */ 3109 3110 B_lwork = -1; 3111 S = NULL; 3112 St = NULL; 3113 eigs = NULL; 3114 eigv = NULL; 3115 B_iwork = NULL; 3116 B_ifail = NULL; 3117 #if defined(PETSC_USE_COMPLEX) 3118 rwork = NULL; 3119 #endif 3120 thresh = 1.0; 3121 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3122 #if defined(PETSC_USE_COMPLEX) 3123 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3124 #else 3125 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3126 #endif 3127 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3128 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3129 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3130 } else { 3131 lwork = 0; 3132 } 3133 3134 nv = 0; 3135 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3136 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3137 } 3138 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3139 if (allocated_S_St) { 3140 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3141 } 3142 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3143 #if defined(PETSC_USE_COMPLEX) 3144 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3145 #endif 3146 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3147 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3148 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3149 nv+cum,&pcbddc->adaptive_constraints_idxs, 3150 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3151 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3152 3153 maxneigs = 0; 3154 cum = cumarray = 0; 3155 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3156 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3157 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3158 const PetscInt *idxs; 3159 3160 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3161 for (cum=0;cum<nv;cum++) { 3162 pcbddc->adaptive_constraints_n[cum] = 1; 3163 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3164 pcbddc->adaptive_constraints_data[cum] = 1.0; 3165 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3166 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3167 } 3168 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3169 } 3170 3171 if (mss) { /* multilevel */ 3172 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3173 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3174 } 3175 3176 lthresh = pcbddc->adaptive_threshold[0]; 3177 uthresh = pcbddc->adaptive_threshold[1]; 3178 for (i=0;i<sub_schurs->n_subs;i++) { 3179 const PetscInt *idxs; 3180 PetscReal upper,lower; 3181 PetscInt j,subset_size,eigs_start = 0; 3182 PetscBLASInt B_N; 3183 PetscBool same_data = PETSC_FALSE; 3184 PetscBool scal = PETSC_FALSE; 3185 3186 if (pcbddc->use_deluxe_scaling) { 3187 upper = PETSC_MAX_REAL; 3188 lower = uthresh; 3189 } else { 3190 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3191 upper = 1./uthresh; 3192 lower = 0.; 3193 } 3194 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3195 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3196 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3197 /* this is experimental: we assume the dofs have been properly grouped to have 3198 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3199 if (!sub_schurs->is_posdef) { 3200 Mat T; 3201 3202 for (j=0;j<subset_size;j++) { 3203 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3204 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3205 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3206 ierr = MatDestroy(&T);CHKERRQ(ierr); 3207 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3208 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3209 ierr = MatDestroy(&T);CHKERRQ(ierr); 3210 if (sub_schurs->change_primal_sub) { 3211 PetscInt nz,k; 3212 const PetscInt *idxs; 3213 3214 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3215 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3216 for (k=0;k<nz;k++) { 3217 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3218 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3219 } 3220 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3221 } 3222 scal = PETSC_TRUE; 3223 break; 3224 } 3225 } 3226 } 3227 3228 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3229 if (sub_schurs->is_symmetric) { 3230 PetscInt j,k; 3231 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3232 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3233 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3234 } 3235 for (j=0;j<subset_size;j++) { 3236 for (k=j;k<subset_size;k++) { 3237 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3238 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3239 } 3240 } 3241 } else { 3242 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3243 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3244 } 3245 } else { 3246 S = Sarray + cumarray; 3247 St = Starray + cumarray; 3248 } 3249 /* see if we can save some work */ 3250 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3251 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3252 } 3253 3254 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3255 B_neigs = 0; 3256 } else { 3257 if (sub_schurs->is_symmetric) { 3258 PetscBLASInt B_itype = 1; 3259 PetscBLASInt B_IL, B_IU; 3260 PetscReal eps = -1.0; /* dlamch? */ 3261 PetscInt nmin_s; 3262 PetscBool compute_range; 3263 3264 B_neigs = 0; 3265 compute_range = (PetscBool)!same_data; 3266 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3267 3268 if (pcbddc->dbg_flag) { 3269 PetscInt nc = 0; 3270 3271 if (sub_schurs->change_primal_sub) { 3272 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3273 } 3274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d (range %d) (change %d).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3275 } 3276 3277 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3278 if (compute_range) { 3279 3280 /* ask for eigenvalues larger than thresh */ 3281 if (sub_schurs->is_posdef) { 3282 #if defined(PETSC_USE_COMPLEX) 3283 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3284 #else 3285 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3286 #endif 3287 } else { /* no theory so far, but it works nicely */ 3288 PetscInt recipe = 0,recipe_m = 1; 3289 PetscReal bb[2]; 3290 3291 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3292 switch (recipe) { 3293 case 0: 3294 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3295 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3296 #if defined(PETSC_USE_COMPLEX) 3297 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3298 #else 3299 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3300 #endif 3301 break; 3302 case 1: 3303 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3304 #if defined(PETSC_USE_COMPLEX) 3305 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3306 #else 3307 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3308 #endif 3309 if (!scal) { 3310 PetscBLASInt B_neigs2 = 0; 3311 3312 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3313 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3314 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3315 #if defined(PETSC_USE_COMPLEX) 3316 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3317 #else 3318 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3319 #endif 3320 B_neigs += B_neigs2; 3321 } 3322 break; 3323 case 2: 3324 if (scal) { 3325 bb[0] = PETSC_MIN_REAL; 3326 bb[1] = 0; 3327 #if defined(PETSC_USE_COMPLEX) 3328 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3329 #else 3330 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3331 #endif 3332 } else { 3333 PetscBLASInt B_neigs2 = 0; 3334 PetscBool import = PETSC_FALSE; 3335 3336 lthresh = PetscMax(lthresh,0.0); 3337 if (lthresh > 0.0) { 3338 bb[0] = PETSC_MIN_REAL; 3339 bb[1] = lthresh*lthresh; 3340 3341 import = PETSC_TRUE; 3342 #if defined(PETSC_USE_COMPLEX) 3343 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3344 #else 3345 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3346 #endif 3347 } 3348 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3349 bb[1] = PETSC_MAX_REAL; 3350 if (import) { 3351 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3352 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3353 } 3354 #if defined(PETSC_USE_COMPLEX) 3355 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3356 #else 3357 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3358 #endif 3359 B_neigs += B_neigs2; 3360 } 3361 break; 3362 case 3: 3363 if (scal) { 3364 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3365 } else { 3366 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3367 } 3368 if (!scal) { 3369 bb[0] = uthresh; 3370 bb[1] = PETSC_MAX_REAL; 3371 #if defined(PETSC_USE_COMPLEX) 3372 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3373 #else 3374 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3375 #endif 3376 } 3377 if (recipe_m > 0 && B_N - B_neigs > 0) { 3378 PetscBLASInt B_neigs2 = 0; 3379 3380 B_IL = 1; 3381 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3382 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3383 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3384 #if defined(PETSC_USE_COMPLEX) 3385 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3386 #else 3387 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3388 #endif 3389 B_neigs += B_neigs2; 3390 } 3391 break; 3392 case 4: 3393 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3394 #if defined(PETSC_USE_COMPLEX) 3395 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3396 #else 3397 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3398 #endif 3399 { 3400 PetscBLASInt B_neigs2 = 0; 3401 3402 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3403 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3404 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3405 #if defined(PETSC_USE_COMPLEX) 3406 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3407 #else 3408 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3409 #endif 3410 B_neigs += B_neigs2; 3411 } 3412 break; 3413 case 5: /* same as before: first compute all eigenvalues, then filter */ 3414 #if defined(PETSC_USE_COMPLEX) 3415 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3416 #else 3417 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3418 #endif 3419 { 3420 PetscInt e,k,ne; 3421 for (e=0,ne=0;e<B_neigs;e++) { 3422 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3423 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3424 eigs[ne] = eigs[e]; 3425 ne++; 3426 } 3427 } 3428 ierr = PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));CHKERRQ(ierr); 3429 B_neigs = ne; 3430 } 3431 break; 3432 default: 3433 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3434 break; 3435 } 3436 } 3437 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3438 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3439 B_IL = 1; 3440 #if defined(PETSC_USE_COMPLEX) 3441 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3442 #else 3443 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3444 #endif 3445 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3446 PetscInt k; 3447 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3448 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3449 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3450 nmin = nmax; 3451 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3452 for (k=0;k<nmax;k++) { 3453 eigs[k] = 1./PETSC_SMALL; 3454 eigv[k*(subset_size+1)] = 1.0; 3455 } 3456 } 3457 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3458 if (B_ierr) { 3459 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3460 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3461 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3462 } 3463 3464 if (B_neigs > nmax) { 3465 if (pcbddc->dbg_flag) { 3466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);CHKERRQ(ierr); 3467 } 3468 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3469 B_neigs = nmax; 3470 } 3471 3472 nmin_s = PetscMin(nmin,B_N); 3473 if (B_neigs < nmin_s) { 3474 PetscBLASInt B_neigs2 = 0; 3475 3476 if (pcbddc->use_deluxe_scaling) { 3477 if (scal) { 3478 B_IU = nmin_s; 3479 B_IL = B_neigs + 1; 3480 } else { 3481 B_IL = B_N - nmin_s + 1; 3482 B_IU = B_N - B_neigs; 3483 } 3484 } else { 3485 B_IL = B_neigs + 1; 3486 B_IU = nmin_s; 3487 } 3488 if (pcbddc->dbg_flag) { 3489 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3490 } 3491 if (sub_schurs->is_symmetric) { 3492 PetscInt j,k; 3493 for (j=0;j<subset_size;j++) { 3494 for (k=j;k<subset_size;k++) { 3495 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3496 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3497 } 3498 } 3499 } else { 3500 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3501 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3502 } 3503 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3504 #if defined(PETSC_USE_COMPLEX) 3505 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3506 #else 3507 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3508 #endif 3509 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3510 B_neigs += B_neigs2; 3511 } 3512 if (B_ierr) { 3513 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3514 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3515 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3516 } 3517 if (pcbddc->dbg_flag) { 3518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3519 for (j=0;j<B_neigs;j++) { 3520 if (eigs[j] == 0.0) { 3521 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3522 } else { 3523 if (pcbddc->use_deluxe_scaling) { 3524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3525 } else { 3526 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3527 } 3528 } 3529 } 3530 } 3531 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3532 } 3533 /* change the basis back to the original one */ 3534 if (sub_schurs->change) { 3535 Mat change,phi,phit; 3536 3537 if (pcbddc->dbg_flag > 2) { 3538 PetscInt ii; 3539 for (ii=0;ii<B_neigs;ii++) { 3540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3541 for (j=0;j<B_N;j++) { 3542 #if defined(PETSC_USE_COMPLEX) 3543 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3544 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3546 #else 3547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3548 #endif 3549 } 3550 } 3551 } 3552 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3553 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3554 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3555 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3556 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3557 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3558 } 3559 maxneigs = PetscMax(B_neigs,maxneigs); 3560 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3561 if (B_neigs) { 3562 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3563 3564 if (pcbddc->dbg_flag > 1) { 3565 PetscInt ii; 3566 for (ii=0;ii<B_neigs;ii++) { 3567 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3568 for (j=0;j<B_N;j++) { 3569 #if defined(PETSC_USE_COMPLEX) 3570 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3571 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3573 #else 3574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3575 #endif 3576 } 3577 } 3578 } 3579 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3580 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3581 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3582 cum++; 3583 } 3584 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3585 /* shift for next computation */ 3586 cumarray += subset_size*subset_size; 3587 } 3588 if (pcbddc->dbg_flag) { 3589 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3590 } 3591 3592 if (mss) { 3593 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3594 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3595 /* destroy matrices (junk) */ 3596 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3597 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3598 } 3599 if (allocated_S_St) { 3600 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3601 } 3602 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3603 #if defined(PETSC_USE_COMPLEX) 3604 ierr = PetscFree(rwork);CHKERRQ(ierr); 3605 #endif 3606 if (pcbddc->dbg_flag) { 3607 PetscInt maxneigs_r; 3608 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3609 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3610 } 3611 PetscFunctionReturn(0); 3612 } 3613 3614 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3615 { 3616 PetscScalar *coarse_submat_vals; 3617 PetscErrorCode ierr; 3618 3619 PetscFunctionBegin; 3620 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3621 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3622 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3623 3624 /* Setup local neumann solver ksp_R */ 3625 /* PCBDDCSetUpLocalScatters should be called first! */ 3626 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3627 3628 /* 3629 Setup local correction and local part of coarse basis. 3630 Gives back the dense local part of the coarse matrix in column major ordering 3631 */ 3632 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3633 3634 /* Compute total number of coarse nodes and setup coarse solver */ 3635 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3636 3637 /* free */ 3638 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3639 PetscFunctionReturn(0); 3640 } 3641 3642 PetscErrorCode PCBDDCResetCustomization(PC pc) 3643 { 3644 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3645 PetscErrorCode ierr; 3646 3647 PetscFunctionBegin; 3648 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3649 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3650 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3651 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3652 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3653 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3654 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3655 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3656 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3657 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3658 PetscFunctionReturn(0); 3659 } 3660 3661 PetscErrorCode PCBDDCResetTopography(PC pc) 3662 { 3663 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3664 PetscInt i; 3665 PetscErrorCode ierr; 3666 3667 PetscFunctionBegin; 3668 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3669 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3670 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3671 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3672 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3673 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3674 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3675 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3676 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3677 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3678 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3679 for (i=0;i<pcbddc->n_local_subs;i++) { 3680 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3681 } 3682 pcbddc->n_local_subs = 0; 3683 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3684 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3685 pcbddc->graphanalyzed = PETSC_FALSE; 3686 pcbddc->recompute_topography = PETSC_TRUE; 3687 pcbddc->corner_selected = PETSC_FALSE; 3688 PetscFunctionReturn(0); 3689 } 3690 3691 PetscErrorCode PCBDDCResetSolvers(PC pc) 3692 { 3693 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3694 PetscErrorCode ierr; 3695 3696 PetscFunctionBegin; 3697 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3698 if (pcbddc->coarse_phi_B) { 3699 PetscScalar *array; 3700 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3701 ierr = PetscFree(array);CHKERRQ(ierr); 3702 } 3703 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3704 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3705 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3706 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3707 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3708 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3709 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3710 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3711 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3712 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3713 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3714 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3715 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3716 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3717 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3718 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3719 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3720 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3721 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3722 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3723 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3724 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3725 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3726 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3727 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3728 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3729 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3730 if (pcbddc->benign_zerodiag_subs) { 3731 PetscInt i; 3732 for (i=0;i<pcbddc->benign_n;i++) { 3733 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3734 } 3735 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3736 } 3737 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3738 PetscFunctionReturn(0); 3739 } 3740 3741 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3742 { 3743 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3744 PC_IS *pcis = (PC_IS*)pc->data; 3745 VecType impVecType; 3746 PetscInt n_constraints,n_R,old_size; 3747 PetscErrorCode ierr; 3748 3749 PetscFunctionBegin; 3750 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3751 n_R = pcis->n - pcbddc->n_vertices; 3752 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3753 /* local work vectors (try to avoid unneeded work)*/ 3754 /* R nodes */ 3755 old_size = -1; 3756 if (pcbddc->vec1_R) { 3757 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3758 } 3759 if (n_R != old_size) { 3760 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3761 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3762 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3763 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3764 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3765 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3766 } 3767 /* local primal dofs */ 3768 old_size = -1; 3769 if (pcbddc->vec1_P) { 3770 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3771 } 3772 if (pcbddc->local_primal_size != old_size) { 3773 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3774 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3775 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3776 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3777 } 3778 /* local explicit constraints */ 3779 old_size = -1; 3780 if (pcbddc->vec1_C) { 3781 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3782 } 3783 if (n_constraints && n_constraints != old_size) { 3784 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3785 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3786 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3787 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3788 } 3789 PetscFunctionReturn(0); 3790 } 3791 3792 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3793 { 3794 PetscErrorCode ierr; 3795 /* pointers to pcis and pcbddc */ 3796 PC_IS* pcis = (PC_IS*)pc->data; 3797 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3798 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3799 /* submatrices of local problem */ 3800 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3801 /* submatrices of local coarse problem */ 3802 Mat S_VV,S_CV,S_VC,S_CC; 3803 /* working matrices */ 3804 Mat C_CR; 3805 /* additional working stuff */ 3806 PC pc_R; 3807 Mat F,Brhs = NULL; 3808 Vec dummy_vec; 3809 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3810 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3811 PetscScalar *work; 3812 PetscInt *idx_V_B; 3813 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3814 PetscInt i,n_R,n_D,n_B; 3815 3816 /* some shortcuts to scalars */ 3817 PetscScalar one=1.0,m_one=-1.0; 3818 3819 PetscFunctionBegin; 3820 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3821 3822 /* Set Non-overlapping dimensions */ 3823 n_vertices = pcbddc->n_vertices; 3824 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3825 n_B = pcis->n_B; 3826 n_D = pcis->n - n_B; 3827 n_R = pcis->n - n_vertices; 3828 3829 /* vertices in boundary numbering */ 3830 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3831 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3832 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3833 3834 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3835 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3836 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3837 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3839 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3840 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3841 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3843 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3844 3845 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3846 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3847 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3848 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3849 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3850 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3851 lda_rhs = n_R; 3852 need_benign_correction = PETSC_FALSE; 3853 if (isLU || isILU || isCHOL) { 3854 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3855 } else if (sub_schurs && sub_schurs->reuse_solver) { 3856 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3857 MatFactorType type; 3858 3859 F = reuse_solver->F; 3860 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3861 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3862 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3863 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3864 } else { 3865 F = NULL; 3866 } 3867 3868 /* determine if we can use a sparse right-hand side */ 3869 sparserhs = PETSC_FALSE; 3870 if (F) { 3871 MatSolverType solver; 3872 3873 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3874 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3875 } 3876 3877 /* allocate workspace */ 3878 n = 0; 3879 if (n_constraints) { 3880 n += lda_rhs*n_constraints; 3881 } 3882 if (n_vertices) { 3883 n = PetscMax(2*lda_rhs*n_vertices,n); 3884 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3885 } 3886 if (!pcbddc->symmetric_primal) { 3887 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3888 } 3889 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3890 3891 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3892 dummy_vec = NULL; 3893 if (need_benign_correction && lda_rhs != n_R && F) { 3894 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3895 } 3896 3897 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3898 if (n_constraints) { 3899 Mat M3,C_B; 3900 IS is_aux; 3901 PetscScalar *array,*array2; 3902 3903 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3904 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3905 3906 /* Extract constraints on R nodes: C_{CR} */ 3907 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3908 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3909 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3910 3911 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3912 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3913 if (!sparserhs) { 3914 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3915 for (i=0;i<n_constraints;i++) { 3916 const PetscScalar *row_cmat_values; 3917 const PetscInt *row_cmat_indices; 3918 PetscInt size_of_constraint,j; 3919 3920 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3921 for (j=0;j<size_of_constraint;j++) { 3922 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3923 } 3924 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3925 } 3926 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3927 } else { 3928 Mat tC_CR; 3929 3930 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3931 if (lda_rhs != n_R) { 3932 PetscScalar *aa; 3933 PetscInt r,*ii,*jj; 3934 PetscBool done; 3935 3936 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3937 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3938 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3939 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3940 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3941 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3942 } else { 3943 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3944 tC_CR = C_CR; 3945 } 3946 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3947 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3948 } 3949 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3950 if (F) { 3951 if (need_benign_correction) { 3952 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3953 3954 /* rhs is already zero on interior dofs, no need to change the rhs */ 3955 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3956 } 3957 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3958 if (need_benign_correction) { 3959 PetscScalar *marr; 3960 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3961 3962 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3963 if (lda_rhs != n_R) { 3964 for (i=0;i<n_constraints;i++) { 3965 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3966 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3967 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3968 } 3969 } else { 3970 for (i=0;i<n_constraints;i++) { 3971 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3972 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3973 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3974 } 3975 } 3976 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3977 } 3978 } else { 3979 PetscScalar *marr; 3980 3981 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3982 for (i=0;i<n_constraints;i++) { 3983 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3984 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3985 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3986 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3987 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3988 } 3989 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3990 } 3991 if (sparserhs) { 3992 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3993 } 3994 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3995 if (!pcbddc->switch_static) { 3996 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3997 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3998 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3999 for (i=0;i<n_constraints;i++) { 4000 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4001 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4004 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4005 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4006 } 4007 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4008 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4009 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4010 } else { 4011 if (lda_rhs != n_R) { 4012 IS dummy; 4013 4014 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4015 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4016 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4017 } else { 4018 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4019 pcbddc->local_auxmat2 = local_auxmat2_R; 4020 } 4021 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4022 } 4023 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4024 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 4025 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4026 if (isCHOL) { 4027 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4028 } else { 4029 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4030 } 4031 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4032 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4033 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4034 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4035 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4036 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4037 } 4038 4039 /* Get submatrices from subdomain matrix */ 4040 if (n_vertices) { 4041 IS is_aux; 4042 PetscBool isseqaij; 4043 4044 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4045 IS tis; 4046 4047 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4048 ierr = ISSort(tis);CHKERRQ(ierr); 4049 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4050 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4051 } else { 4052 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4053 } 4054 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4055 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4056 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4057 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 4058 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4059 } 4060 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4061 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4062 } 4063 4064 /* Matrix of coarse basis functions (local) */ 4065 if (pcbddc->coarse_phi_B) { 4066 PetscInt on_B,on_primal,on_D=n_D; 4067 if (pcbddc->coarse_phi_D) { 4068 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4069 } 4070 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4071 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4072 PetscScalar *marray; 4073 4074 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4075 ierr = PetscFree(marray);CHKERRQ(ierr); 4076 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4077 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4078 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4079 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4080 } 4081 } 4082 4083 if (!pcbddc->coarse_phi_B) { 4084 PetscScalar *marr; 4085 4086 /* memory size */ 4087 n = n_B*pcbddc->local_primal_size; 4088 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4089 if (!pcbddc->symmetric_primal) n *= 2; 4090 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4091 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4092 marr += n_B*pcbddc->local_primal_size; 4093 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4094 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4095 marr += n_D*pcbddc->local_primal_size; 4096 } 4097 if (!pcbddc->symmetric_primal) { 4098 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4099 marr += n_B*pcbddc->local_primal_size; 4100 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4101 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4102 } 4103 } else { 4104 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4105 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4106 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4107 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4108 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4109 } 4110 } 4111 } 4112 4113 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4114 p0_lidx_I = NULL; 4115 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4116 const PetscInt *idxs; 4117 4118 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4119 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4120 for (i=0;i<pcbddc->benign_n;i++) { 4121 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4122 } 4123 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4124 } 4125 4126 /* vertices */ 4127 if (n_vertices) { 4128 PetscBool restoreavr = PETSC_FALSE; 4129 4130 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4131 4132 if (n_R) { 4133 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4134 PetscBLASInt B_N,B_one = 1; 4135 PetscScalar *x,*y; 4136 4137 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4138 if (need_benign_correction) { 4139 ISLocalToGlobalMapping RtoN; 4140 IS is_p0; 4141 PetscInt *idxs_p0,n; 4142 4143 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4144 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4145 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4146 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 4147 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4148 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4149 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4150 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4151 } 4152 4153 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4154 if (!sparserhs || need_benign_correction) { 4155 if (lda_rhs == n_R) { 4156 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4157 } else { 4158 PetscScalar *av,*array; 4159 const PetscInt *xadj,*adjncy; 4160 PetscInt n; 4161 PetscBool flg_row; 4162 4163 array = work+lda_rhs*n_vertices; 4164 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4165 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4166 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4167 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4168 for (i=0;i<n;i++) { 4169 PetscInt j; 4170 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4171 } 4172 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4173 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4174 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4175 } 4176 if (need_benign_correction) { 4177 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4178 PetscScalar *marr; 4179 4180 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4181 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4182 4183 | 0 0 0 | (V) 4184 L = | 0 0 -1 | (P-p0) 4185 | 0 0 -1 | (p0) 4186 4187 */ 4188 for (i=0;i<reuse_solver->benign_n;i++) { 4189 const PetscScalar *vals; 4190 const PetscInt *idxs,*idxs_zero; 4191 PetscInt n,j,nz; 4192 4193 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4194 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4195 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4196 for (j=0;j<n;j++) { 4197 PetscScalar val = vals[j]; 4198 PetscInt k,col = idxs[j]; 4199 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4200 } 4201 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4202 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4203 } 4204 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4205 } 4206 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4207 Brhs = A_RV; 4208 } else { 4209 Mat tA_RVT,A_RVT; 4210 4211 if (!pcbddc->symmetric_primal) { 4212 /* A_RV already scaled by -1 */ 4213 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4214 } else { 4215 restoreavr = PETSC_TRUE; 4216 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4217 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4218 A_RVT = A_VR; 4219 } 4220 if (lda_rhs != n_R) { 4221 PetscScalar *aa; 4222 PetscInt r,*ii,*jj; 4223 PetscBool done; 4224 4225 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4226 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4227 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4228 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4229 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4230 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4231 } else { 4232 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4233 tA_RVT = A_RVT; 4234 } 4235 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4236 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4237 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4238 } 4239 if (F) { 4240 /* need to correct the rhs */ 4241 if (need_benign_correction) { 4242 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4243 PetscScalar *marr; 4244 4245 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4246 if (lda_rhs != n_R) { 4247 for (i=0;i<n_vertices;i++) { 4248 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4249 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4250 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4251 } 4252 } else { 4253 for (i=0;i<n_vertices;i++) { 4254 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4255 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4256 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4257 } 4258 } 4259 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4260 } 4261 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4262 if (restoreavr) { 4263 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4264 } 4265 /* need to correct the solution */ 4266 if (need_benign_correction) { 4267 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4268 PetscScalar *marr; 4269 4270 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4271 if (lda_rhs != n_R) { 4272 for (i=0;i<n_vertices;i++) { 4273 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4274 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4275 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4276 } 4277 } else { 4278 for (i=0;i<n_vertices;i++) { 4279 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4280 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4281 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4282 } 4283 } 4284 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4285 } 4286 } else { 4287 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4288 for (i=0;i<n_vertices;i++) { 4289 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4290 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4291 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4292 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4293 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4294 } 4295 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4296 } 4297 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4298 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4299 /* S_VV and S_CV */ 4300 if (n_constraints) { 4301 Mat B; 4302 4303 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4304 for (i=0;i<n_vertices;i++) { 4305 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4306 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4307 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4308 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4309 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4310 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4311 } 4312 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4313 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4314 ierr = MatDestroy(&B);CHKERRQ(ierr); 4315 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4316 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4317 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4318 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4319 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4320 ierr = MatDestroy(&B);CHKERRQ(ierr); 4321 } 4322 if (lda_rhs != n_R) { 4323 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4324 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4325 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4326 } 4327 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4328 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4329 if (need_benign_correction) { 4330 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4331 PetscScalar *marr,*sums; 4332 4333 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4334 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4335 for (i=0;i<reuse_solver->benign_n;i++) { 4336 const PetscScalar *vals; 4337 const PetscInt *idxs,*idxs_zero; 4338 PetscInt n,j,nz; 4339 4340 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4341 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4342 for (j=0;j<n_vertices;j++) { 4343 PetscInt k; 4344 sums[j] = 0.; 4345 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4346 } 4347 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4348 for (j=0;j<n;j++) { 4349 PetscScalar val = vals[j]; 4350 PetscInt k; 4351 for (k=0;k<n_vertices;k++) { 4352 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4353 } 4354 } 4355 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4356 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4357 } 4358 ierr = PetscFree(sums);CHKERRQ(ierr); 4359 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4360 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4361 } 4362 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4363 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4364 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4365 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4366 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4367 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4368 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4369 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4370 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4371 } else { 4372 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4373 } 4374 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4375 4376 /* coarse basis functions */ 4377 for (i=0;i<n_vertices;i++) { 4378 PetscScalar *y; 4379 4380 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4381 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4382 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4383 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4384 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4385 y[n_B*i+idx_V_B[i]] = 1.0; 4386 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4387 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4388 4389 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4390 PetscInt j; 4391 4392 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4393 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4394 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4395 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4396 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4397 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4398 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4399 } 4400 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4401 } 4402 /* if n_R == 0 the object is not destroyed */ 4403 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4404 } 4405 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4406 4407 if (n_constraints) { 4408 Mat B; 4409 4410 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4411 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4412 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4413 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4414 if (n_vertices) { 4415 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4416 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4417 } else { 4418 Mat S_VCt; 4419 4420 if (lda_rhs != n_R) { 4421 ierr = MatDestroy(&B);CHKERRQ(ierr); 4422 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4423 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4424 } 4425 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4426 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4427 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4428 } 4429 } 4430 ierr = MatDestroy(&B);CHKERRQ(ierr); 4431 /* coarse basis functions */ 4432 for (i=0;i<n_constraints;i++) { 4433 PetscScalar *y; 4434 4435 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4436 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4437 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4438 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4439 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4440 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4441 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4442 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4443 PetscInt j; 4444 4445 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4446 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4447 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4448 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4449 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4450 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4451 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4452 } 4453 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4454 } 4455 } 4456 if (n_constraints) { 4457 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4458 } 4459 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4460 4461 /* coarse matrix entries relative to B_0 */ 4462 if (pcbddc->benign_n) { 4463 Mat B0_B,B0_BPHI; 4464 IS is_dummy; 4465 PetscScalar *data; 4466 PetscInt j; 4467 4468 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4469 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4470 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4471 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4472 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4473 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4474 for (j=0;j<pcbddc->benign_n;j++) { 4475 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4476 for (i=0;i<pcbddc->local_primal_size;i++) { 4477 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4478 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4479 } 4480 } 4481 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4482 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4483 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4484 } 4485 4486 /* compute other basis functions for non-symmetric problems */ 4487 if (!pcbddc->symmetric_primal) { 4488 Mat B_V=NULL,B_C=NULL; 4489 PetscScalar *marray; 4490 4491 if (n_constraints) { 4492 Mat S_CCT,C_CRT; 4493 4494 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4495 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4496 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4497 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4498 if (n_vertices) { 4499 Mat S_VCT; 4500 4501 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4502 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4503 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4504 } 4505 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4506 } else { 4507 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4508 } 4509 if (n_vertices && n_R) { 4510 PetscScalar *av,*marray; 4511 const PetscInt *xadj,*adjncy; 4512 PetscInt n; 4513 PetscBool flg_row; 4514 4515 /* B_V = B_V - A_VR^T */ 4516 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4517 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4518 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4519 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4520 for (i=0;i<n;i++) { 4521 PetscInt j; 4522 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4523 } 4524 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4525 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4526 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4527 } 4528 4529 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4530 if (n_vertices) { 4531 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4532 for (i=0;i<n_vertices;i++) { 4533 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4534 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4535 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4536 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4537 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4538 } 4539 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4540 } 4541 if (B_C) { 4542 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4543 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4544 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4545 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4546 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4547 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4548 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4549 } 4550 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4551 } 4552 /* coarse basis functions */ 4553 for (i=0;i<pcbddc->local_primal_size;i++) { 4554 PetscScalar *y; 4555 4556 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4557 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4558 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4559 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4560 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4561 if (i<n_vertices) { 4562 y[n_B*i+idx_V_B[i]] = 1.0; 4563 } 4564 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4565 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4566 4567 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4568 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4569 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4570 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4571 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4572 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4573 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4574 } 4575 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4576 } 4577 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4578 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4579 } 4580 4581 /* free memory */ 4582 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4583 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4584 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4585 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4586 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4587 ierr = PetscFree(work);CHKERRQ(ierr); 4588 if (n_vertices) { 4589 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4590 } 4591 if (n_constraints) { 4592 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4593 } 4594 /* Checking coarse_sub_mat and coarse basis functios */ 4595 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4596 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4597 if (pcbddc->dbg_flag) { 4598 Mat coarse_sub_mat; 4599 Mat AUXMAT,TM1,TM2,TM3,TM4; 4600 Mat coarse_phi_D,coarse_phi_B; 4601 Mat coarse_psi_D,coarse_psi_B; 4602 Mat A_II,A_BB,A_IB,A_BI; 4603 Mat C_B,CPHI; 4604 IS is_dummy; 4605 Vec mones; 4606 MatType checkmattype=MATSEQAIJ; 4607 PetscReal real_value; 4608 4609 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4610 Mat A; 4611 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4612 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4613 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4614 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4615 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4616 ierr = MatDestroy(&A);CHKERRQ(ierr); 4617 } else { 4618 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4619 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4620 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4621 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4622 } 4623 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4624 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4625 if (!pcbddc->symmetric_primal) { 4626 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4627 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4628 } 4629 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4630 4631 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4632 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4633 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4634 if (!pcbddc->symmetric_primal) { 4635 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4636 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4637 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4638 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4639 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4640 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4641 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4642 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4643 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4644 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4645 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4646 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4647 } else { 4648 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4649 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4650 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4651 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4652 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4653 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4654 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4655 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4656 } 4657 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4658 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4659 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4660 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4661 if (pcbddc->benign_n) { 4662 Mat B0_B,B0_BPHI; 4663 PetscScalar *data,*data2; 4664 PetscInt j; 4665 4666 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4667 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4668 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4669 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4670 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4671 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4672 for (j=0;j<pcbddc->benign_n;j++) { 4673 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4674 for (i=0;i<pcbddc->local_primal_size;i++) { 4675 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4676 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4677 } 4678 } 4679 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4680 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4681 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4682 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4683 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4684 } 4685 #if 0 4686 { 4687 PetscViewer viewer; 4688 char filename[256]; 4689 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4690 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4691 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4692 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4693 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4694 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4695 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4696 if (pcbddc->coarse_phi_B) { 4697 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4698 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4699 } 4700 if (pcbddc->coarse_phi_D) { 4701 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4702 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4703 } 4704 if (pcbddc->coarse_psi_B) { 4705 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4706 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4707 } 4708 if (pcbddc->coarse_psi_D) { 4709 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4710 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4711 } 4712 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4713 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4714 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4715 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4716 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4717 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4718 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4719 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4720 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4721 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4722 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4723 } 4724 #endif 4725 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4726 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4727 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4728 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4729 4730 /* check constraints */ 4731 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4732 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4733 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4734 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4735 } else { 4736 PetscScalar *data; 4737 Mat tmat; 4738 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4739 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4740 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4741 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4742 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4743 } 4744 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4745 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4746 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4747 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4748 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4749 if (!pcbddc->symmetric_primal) { 4750 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4751 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4752 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4753 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4754 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4755 } 4756 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4757 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4758 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4759 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4760 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4761 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4762 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4763 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4764 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4765 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4766 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4767 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4768 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4769 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4770 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4771 if (!pcbddc->symmetric_primal) { 4772 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4773 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4774 } 4775 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4776 } 4777 /* get back data */ 4778 *coarse_submat_vals_n = coarse_submat_vals; 4779 PetscFunctionReturn(0); 4780 } 4781 4782 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4783 { 4784 Mat *work_mat; 4785 IS isrow_s,iscol_s; 4786 PetscBool rsorted,csorted; 4787 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4788 PetscErrorCode ierr; 4789 4790 PetscFunctionBegin; 4791 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4792 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4793 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4794 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4795 4796 if (!rsorted) { 4797 const PetscInt *idxs; 4798 PetscInt *idxs_sorted,i; 4799 4800 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4801 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4802 for (i=0;i<rsize;i++) { 4803 idxs_perm_r[i] = i; 4804 } 4805 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4806 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4807 for (i=0;i<rsize;i++) { 4808 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4809 } 4810 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4811 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4812 } else { 4813 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4814 isrow_s = isrow; 4815 } 4816 4817 if (!csorted) { 4818 if (isrow == iscol) { 4819 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4820 iscol_s = isrow_s; 4821 } else { 4822 const PetscInt *idxs; 4823 PetscInt *idxs_sorted,i; 4824 4825 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4826 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4827 for (i=0;i<csize;i++) { 4828 idxs_perm_c[i] = i; 4829 } 4830 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4831 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4832 for (i=0;i<csize;i++) { 4833 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4834 } 4835 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4836 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4837 } 4838 } else { 4839 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4840 iscol_s = iscol; 4841 } 4842 4843 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4844 4845 if (!rsorted || !csorted) { 4846 Mat new_mat; 4847 IS is_perm_r,is_perm_c; 4848 4849 if (!rsorted) { 4850 PetscInt *idxs_r,i; 4851 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4852 for (i=0;i<rsize;i++) { 4853 idxs_r[idxs_perm_r[i]] = i; 4854 } 4855 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4856 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4857 } else { 4858 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4859 } 4860 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4861 4862 if (!csorted) { 4863 if (isrow_s == iscol_s) { 4864 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4865 is_perm_c = is_perm_r; 4866 } else { 4867 PetscInt *idxs_c,i; 4868 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4869 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4870 for (i=0;i<csize;i++) { 4871 idxs_c[idxs_perm_c[i]] = i; 4872 } 4873 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4874 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4875 } 4876 } else { 4877 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4878 } 4879 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4880 4881 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4882 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4883 work_mat[0] = new_mat; 4884 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4885 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4886 } 4887 4888 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4889 *B = work_mat[0]; 4890 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4891 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4892 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4893 PetscFunctionReturn(0); 4894 } 4895 4896 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4897 { 4898 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4899 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4900 Mat new_mat,lA; 4901 IS is_local,is_global; 4902 PetscInt local_size; 4903 PetscBool isseqaij; 4904 PetscErrorCode ierr; 4905 4906 PetscFunctionBegin; 4907 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4908 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4909 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4910 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4911 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4912 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4913 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4914 4915 /* check */ 4916 if (pcbddc->dbg_flag) { 4917 Vec x,x_change; 4918 PetscReal error; 4919 4920 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4921 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4922 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4923 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4924 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4925 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4926 if (!pcbddc->change_interior) { 4927 const PetscScalar *x,*y,*v; 4928 PetscReal lerror = 0.; 4929 PetscInt i; 4930 4931 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4932 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4933 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4934 for (i=0;i<local_size;i++) 4935 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4936 lerror = PetscAbsScalar(x[i]-y[i]); 4937 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4938 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4939 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4940 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4941 if (error > PETSC_SMALL) { 4942 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4943 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4944 } else { 4945 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4946 } 4947 } 4948 } 4949 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4950 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4951 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4952 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4953 if (error > PETSC_SMALL) { 4954 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4955 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4956 } else { 4957 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4958 } 4959 } 4960 ierr = VecDestroy(&x);CHKERRQ(ierr); 4961 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4962 } 4963 4964 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4965 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4966 4967 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4968 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4969 if (isseqaij) { 4970 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4971 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4972 if (lA) { 4973 Mat work; 4974 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4975 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4976 ierr = MatDestroy(&work);CHKERRQ(ierr); 4977 } 4978 } else { 4979 Mat work_mat; 4980 4981 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4982 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4983 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4984 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4985 if (lA) { 4986 Mat work; 4987 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4988 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4989 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4990 ierr = MatDestroy(&work);CHKERRQ(ierr); 4991 } 4992 } 4993 if (matis->A->symmetric_set) { 4994 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4995 #if !defined(PETSC_USE_COMPLEX) 4996 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4997 #endif 4998 } 4999 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5000 PetscFunctionReturn(0); 5001 } 5002 5003 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5004 { 5005 PC_IS* pcis = (PC_IS*)(pc->data); 5006 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5007 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5008 PetscInt *idx_R_local=NULL; 5009 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5010 PetscInt vbs,bs; 5011 PetscBT bitmask=NULL; 5012 PetscErrorCode ierr; 5013 5014 PetscFunctionBegin; 5015 /* 5016 No need to setup local scatters if 5017 - primal space is unchanged 5018 AND 5019 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5020 AND 5021 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5022 */ 5023 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5024 PetscFunctionReturn(0); 5025 } 5026 /* destroy old objects */ 5027 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5028 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5029 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5030 /* Set Non-overlapping dimensions */ 5031 n_B = pcis->n_B; 5032 n_D = pcis->n - n_B; 5033 n_vertices = pcbddc->n_vertices; 5034 5035 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5036 5037 /* create auxiliary bitmask and allocate workspace */ 5038 if (!sub_schurs || !sub_schurs->reuse_solver) { 5039 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5040 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5041 for (i=0;i<n_vertices;i++) { 5042 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5043 } 5044 5045 for (i=0, n_R=0; i<pcis->n; i++) { 5046 if (!PetscBTLookup(bitmask,i)) { 5047 idx_R_local[n_R++] = i; 5048 } 5049 } 5050 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5051 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5052 5053 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5054 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5055 } 5056 5057 /* Block code */ 5058 vbs = 1; 5059 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5060 if (bs>1 && !(n_vertices%bs)) { 5061 PetscBool is_blocked = PETSC_TRUE; 5062 PetscInt *vary; 5063 if (!sub_schurs || !sub_schurs->reuse_solver) { 5064 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5065 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 5066 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5067 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5068 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5069 for (i=0; i<pcis->n/bs; i++) { 5070 if (vary[i]!=0 && vary[i]!=bs) { 5071 is_blocked = PETSC_FALSE; 5072 break; 5073 } 5074 } 5075 ierr = PetscFree(vary);CHKERRQ(ierr); 5076 } else { 5077 /* Verify directly the R set */ 5078 for (i=0; i<n_R/bs; i++) { 5079 PetscInt j,node=idx_R_local[bs*i]; 5080 for (j=1; j<bs; j++) { 5081 if (node != idx_R_local[bs*i+j]-j) { 5082 is_blocked = PETSC_FALSE; 5083 break; 5084 } 5085 } 5086 } 5087 } 5088 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5089 vbs = bs; 5090 for (i=0;i<n_R/vbs;i++) { 5091 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5092 } 5093 } 5094 } 5095 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5096 if (sub_schurs && sub_schurs->reuse_solver) { 5097 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5098 5099 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5100 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5101 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5102 reuse_solver->is_R = pcbddc->is_R_local; 5103 } else { 5104 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5105 } 5106 5107 /* print some info if requested */ 5108 if (pcbddc->dbg_flag) { 5109 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5110 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5111 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5112 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5114 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5115 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5116 } 5117 5118 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5119 if (!sub_schurs || !sub_schurs->reuse_solver) { 5120 IS is_aux1,is_aux2; 5121 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5122 5123 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5124 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5125 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5126 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5127 for (i=0; i<n_D; i++) { 5128 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5129 } 5130 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5131 for (i=0, j=0; i<n_R; i++) { 5132 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5133 aux_array1[j++] = i; 5134 } 5135 } 5136 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5137 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5138 for (i=0, j=0; i<n_B; i++) { 5139 if (!PetscBTLookup(bitmask,is_indices[i])) { 5140 aux_array2[j++] = i; 5141 } 5142 } 5143 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5144 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5145 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5146 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5147 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5148 5149 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5150 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5151 for (i=0, j=0; i<n_R; i++) { 5152 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5153 aux_array1[j++] = i; 5154 } 5155 } 5156 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5157 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5158 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5159 } 5160 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5161 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5162 } else { 5163 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5164 IS tis; 5165 PetscInt schur_size; 5166 5167 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5168 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5169 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5170 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5171 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5172 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5173 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5174 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5175 } 5176 } 5177 PetscFunctionReturn(0); 5178 } 5179 5180 5181 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5182 { 5183 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5184 PC_IS *pcis = (PC_IS*)pc->data; 5185 PC pc_temp; 5186 Mat A_RR; 5187 MatReuse reuse; 5188 PetscScalar m_one = -1.0; 5189 PetscReal value; 5190 PetscInt n_D,n_R; 5191 PetscBool check_corr,issbaij; 5192 PetscErrorCode ierr; 5193 /* prefixes stuff */ 5194 char dir_prefix[256],neu_prefix[256],str_level[16]; 5195 size_t len; 5196 5197 PetscFunctionBegin; 5198 5199 /* compute prefixes */ 5200 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5201 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5202 if (!pcbddc->current_level) { 5203 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5204 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5205 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5206 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5207 } else { 5208 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5209 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5210 len -= 15; /* remove "pc_bddc_coarse_" */ 5211 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5212 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5213 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5214 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5215 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5216 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5217 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5218 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5219 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5220 } 5221 5222 /* DIRICHLET PROBLEM */ 5223 if (dirichlet) { 5224 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5225 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5226 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 5227 if (pcbddc->dbg_flag) { 5228 Mat A_IIn; 5229 5230 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5231 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5232 pcis->A_II = A_IIn; 5233 } 5234 } 5235 if (pcbddc->local_mat->symmetric_set) { 5236 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5237 } 5238 /* Matrix for Dirichlet problem is pcis->A_II */ 5239 n_D = pcis->n - pcis->n_B; 5240 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5241 void (*f)(void) = 0; 5242 5243 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5244 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5245 /* default */ 5246 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5247 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5248 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5249 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5250 if (issbaij) { 5251 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5252 } else { 5253 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5254 } 5255 /* Allow user's customization */ 5256 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5257 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5258 if (f && pcbddc->mat_graph->cloc) { 5259 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5260 const PetscInt *idxs; 5261 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5262 5263 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5264 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5265 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5266 for (i=0;i<nl;i++) { 5267 for (d=0;d<cdim;d++) { 5268 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5269 } 5270 } 5271 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5272 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5273 ierr = PetscFree(scoords);CHKERRQ(ierr); 5274 } 5275 } 5276 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5277 if (sub_schurs && sub_schurs->reuse_solver) { 5278 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5279 5280 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5281 } 5282 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5283 if (!n_D) { 5284 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5285 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5286 } 5287 /* set ksp_D into pcis data */ 5288 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5289 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5290 pcis->ksp_D = pcbddc->ksp_D; 5291 } 5292 5293 /* NEUMANN PROBLEM */ 5294 A_RR = 0; 5295 if (neumann) { 5296 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5297 PetscInt ibs,mbs; 5298 PetscBool issbaij, reuse_neumann_solver; 5299 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5300 5301 reuse_neumann_solver = PETSC_FALSE; 5302 if (sub_schurs && sub_schurs->reuse_solver) { 5303 IS iP; 5304 5305 reuse_neumann_solver = PETSC_TRUE; 5306 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5307 if (iP) reuse_neumann_solver = PETSC_FALSE; 5308 } 5309 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5310 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5311 if (pcbddc->ksp_R) { /* already created ksp */ 5312 PetscInt nn_R; 5313 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5314 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5315 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5316 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5317 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5318 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5319 reuse = MAT_INITIAL_MATRIX; 5320 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5321 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5322 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5323 reuse = MAT_INITIAL_MATRIX; 5324 } else { /* safe to reuse the matrix */ 5325 reuse = MAT_REUSE_MATRIX; 5326 } 5327 } 5328 /* last check */ 5329 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5330 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5331 reuse = MAT_INITIAL_MATRIX; 5332 } 5333 } else { /* first time, so we need to create the matrix */ 5334 reuse = MAT_INITIAL_MATRIX; 5335 } 5336 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5337 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5338 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5339 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5340 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5341 if (matis->A == pcbddc->local_mat) { 5342 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5343 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5344 } else { 5345 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5346 } 5347 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5348 if (matis->A == pcbddc->local_mat) { 5349 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5350 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5351 } else { 5352 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5353 } 5354 } 5355 /* extract A_RR */ 5356 if (reuse_neumann_solver) { 5357 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5358 5359 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5360 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5361 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5362 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5363 } else { 5364 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5365 } 5366 } else { 5367 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5368 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5369 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5370 } 5371 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5372 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5373 } 5374 if (pcbddc->local_mat->symmetric_set) { 5375 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5376 } 5377 if (!pcbddc->ksp_R) { /* create object if not present */ 5378 void (*f)(void) = 0; 5379 5380 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5381 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5382 /* default */ 5383 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5384 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5385 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5386 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5387 if (issbaij) { 5388 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5389 } else { 5390 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5391 } 5392 /* Allow user's customization */ 5393 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5394 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5395 if (f && pcbddc->mat_graph->cloc) { 5396 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5397 const PetscInt *idxs; 5398 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5399 5400 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5401 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5402 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5403 for (i=0;i<nl;i++) { 5404 for (d=0;d<cdim;d++) { 5405 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5406 } 5407 } 5408 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5409 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5410 ierr = PetscFree(scoords);CHKERRQ(ierr); 5411 } 5412 } 5413 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5414 if (!n_R) { 5415 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5416 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5417 } 5418 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5419 /* Reuse solver if it is present */ 5420 if (reuse_neumann_solver) { 5421 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5422 5423 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5424 } 5425 } 5426 5427 if (pcbddc->dbg_flag) { 5428 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5429 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5430 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5431 } 5432 5433 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5434 check_corr = PETSC_FALSE; 5435 if (pcbddc->NullSpace_corr[0]) { 5436 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5437 } 5438 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5439 check_corr = PETSC_TRUE; 5440 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5441 } 5442 if (neumann && pcbddc->NullSpace_corr[2]) { 5443 check_corr = PETSC_TRUE; 5444 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5445 } 5446 /* check Dirichlet and Neumann solvers */ 5447 if (pcbddc->dbg_flag) { 5448 if (dirichlet) { /* Dirichlet */ 5449 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5450 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5451 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5452 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5453 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5454 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5455 if (check_corr) { 5456 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5457 } 5458 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5459 } 5460 if (neumann) { /* Neumann */ 5461 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5462 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5463 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5464 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5465 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5467 if (check_corr) { 5468 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5469 } 5470 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5471 } 5472 } 5473 /* free Neumann problem's matrix */ 5474 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5475 PetscFunctionReturn(0); 5476 } 5477 5478 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5479 { 5480 PetscErrorCode ierr; 5481 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5482 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5483 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5484 5485 PetscFunctionBegin; 5486 if (!reuse_solver) { 5487 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5488 } 5489 if (!pcbddc->switch_static) { 5490 if (applytranspose && pcbddc->local_auxmat1) { 5491 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5492 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5493 } 5494 if (!reuse_solver) { 5495 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5496 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5497 } else { 5498 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5499 5500 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5501 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5502 } 5503 } else { 5504 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5505 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5506 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5507 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5508 if (applytranspose && pcbddc->local_auxmat1) { 5509 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5510 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5511 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5512 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5513 } 5514 } 5515 if (!reuse_solver || pcbddc->switch_static) { 5516 if (applytranspose) { 5517 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5518 } else { 5519 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5520 } 5521 } else { 5522 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5523 5524 if (applytranspose) { 5525 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5526 } else { 5527 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5528 } 5529 } 5530 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5531 if (!pcbddc->switch_static) { 5532 if (!reuse_solver) { 5533 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5534 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5535 } else { 5536 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5537 5538 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5539 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5540 } 5541 if (!applytranspose && pcbddc->local_auxmat1) { 5542 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5543 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5544 } 5545 } else { 5546 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5547 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5548 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5549 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5550 if (!applytranspose && pcbddc->local_auxmat1) { 5551 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5552 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5553 } 5554 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5555 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5556 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5557 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5558 } 5559 PetscFunctionReturn(0); 5560 } 5561 5562 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5563 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5564 { 5565 PetscErrorCode ierr; 5566 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5567 PC_IS* pcis = (PC_IS*) (pc->data); 5568 const PetscScalar zero = 0.0; 5569 5570 PetscFunctionBegin; 5571 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5572 if (!pcbddc->benign_apply_coarse_only) { 5573 if (applytranspose) { 5574 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5575 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5576 } else { 5577 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5578 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5579 } 5580 } else { 5581 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5582 } 5583 5584 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5585 if (pcbddc->benign_n) { 5586 PetscScalar *array; 5587 PetscInt j; 5588 5589 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5590 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5591 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5592 } 5593 5594 /* start communications from local primal nodes to rhs of coarse solver */ 5595 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5596 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5597 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5598 5599 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5600 if (pcbddc->coarse_ksp) { 5601 Mat coarse_mat; 5602 Vec rhs,sol; 5603 MatNullSpace nullsp; 5604 PetscBool isbddc = PETSC_FALSE; 5605 5606 if (pcbddc->benign_have_null) { 5607 PC coarse_pc; 5608 5609 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5610 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5611 /* we need to propagate to coarser levels the need for a possible benign correction */ 5612 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5613 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5614 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5615 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5616 } 5617 } 5618 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5619 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5620 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5621 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5622 if (nullsp) { 5623 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5624 } 5625 if (applytranspose) { 5626 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5627 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5628 } else { 5629 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5630 PC coarse_pc; 5631 5632 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5633 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5634 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5635 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5636 } else { 5637 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5638 } 5639 } 5640 /* we don't need the benign correction at coarser levels anymore */ 5641 if (pcbddc->benign_have_null && isbddc) { 5642 PC coarse_pc; 5643 PC_BDDC* coarsepcbddc; 5644 5645 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5646 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5647 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5648 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5649 } 5650 if (nullsp) { 5651 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5652 } 5653 } 5654 5655 /* Local solution on R nodes */ 5656 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5657 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5658 } 5659 /* communications from coarse sol to local primal nodes */ 5660 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5661 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5662 5663 /* Sum contributions from the two levels */ 5664 if (!pcbddc->benign_apply_coarse_only) { 5665 if (applytranspose) { 5666 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5667 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5668 } else { 5669 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5670 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5671 } 5672 /* store p0 */ 5673 if (pcbddc->benign_n) { 5674 PetscScalar *array; 5675 PetscInt j; 5676 5677 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5678 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5679 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5680 } 5681 } else { /* expand the coarse solution */ 5682 if (applytranspose) { 5683 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5684 } else { 5685 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5686 } 5687 } 5688 PetscFunctionReturn(0); 5689 } 5690 5691 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5692 { 5693 PetscErrorCode ierr; 5694 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5695 PetscScalar *array; 5696 Vec from,to; 5697 5698 PetscFunctionBegin; 5699 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5700 from = pcbddc->coarse_vec; 5701 to = pcbddc->vec1_P; 5702 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5703 Vec tvec; 5704 5705 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5706 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5707 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5708 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5709 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5710 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5711 } 5712 } else { /* from local to global -> put data in coarse right hand side */ 5713 from = pcbddc->vec1_P; 5714 to = pcbddc->coarse_vec; 5715 } 5716 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5717 PetscFunctionReturn(0); 5718 } 5719 5720 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5721 { 5722 PetscErrorCode ierr; 5723 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5724 PetscScalar *array; 5725 Vec from,to; 5726 5727 PetscFunctionBegin; 5728 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5729 from = pcbddc->coarse_vec; 5730 to = pcbddc->vec1_P; 5731 } else { /* from local to global -> put data in coarse right hand side */ 5732 from = pcbddc->vec1_P; 5733 to = pcbddc->coarse_vec; 5734 } 5735 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5736 if (smode == SCATTER_FORWARD) { 5737 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5738 Vec tvec; 5739 5740 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5741 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5742 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5743 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5744 } 5745 } else { 5746 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5747 ierr = VecResetArray(from);CHKERRQ(ierr); 5748 } 5749 } 5750 PetscFunctionReturn(0); 5751 } 5752 5753 /* uncomment for testing purposes */ 5754 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5755 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5756 { 5757 PetscErrorCode ierr; 5758 PC_IS* pcis = (PC_IS*)(pc->data); 5759 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5760 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5761 /* one and zero */ 5762 PetscScalar one=1.0,zero=0.0; 5763 /* space to store constraints and their local indices */ 5764 PetscScalar *constraints_data; 5765 PetscInt *constraints_idxs,*constraints_idxs_B; 5766 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5767 PetscInt *constraints_n; 5768 /* iterators */ 5769 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5770 /* BLAS integers */ 5771 PetscBLASInt lwork,lierr; 5772 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5773 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5774 /* reuse */ 5775 PetscInt olocal_primal_size,olocal_primal_size_cc; 5776 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5777 /* change of basis */ 5778 PetscBool qr_needed; 5779 PetscBT change_basis,qr_needed_idx; 5780 /* auxiliary stuff */ 5781 PetscInt *nnz,*is_indices; 5782 PetscInt ncc; 5783 /* some quantities */ 5784 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5785 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5786 PetscReal tol; /* tolerance for retaining eigenmodes */ 5787 5788 PetscFunctionBegin; 5789 tol = PetscSqrtReal(PETSC_SMALL); 5790 /* Destroy Mat objects computed previously */ 5791 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5792 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5793 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5794 /* save info on constraints from previous setup (if any) */ 5795 olocal_primal_size = pcbddc->local_primal_size; 5796 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5797 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5798 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5799 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5800 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5801 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5802 5803 if (!pcbddc->adaptive_selection) { 5804 IS ISForVertices,*ISForFaces,*ISForEdges; 5805 MatNullSpace nearnullsp; 5806 const Vec *nearnullvecs; 5807 Vec *localnearnullsp; 5808 PetscScalar *array; 5809 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5810 PetscBool nnsp_has_cnst; 5811 /* LAPACK working arrays for SVD or POD */ 5812 PetscBool skip_lapack,boolforchange; 5813 PetscScalar *work; 5814 PetscReal *singular_vals; 5815 #if defined(PETSC_USE_COMPLEX) 5816 PetscReal *rwork; 5817 #endif 5818 #if defined(PETSC_MISSING_LAPACK_GESVD) 5819 PetscScalar *temp_basis,*correlation_mat; 5820 #else 5821 PetscBLASInt dummy_int=1; 5822 PetscScalar dummy_scalar=1.; 5823 #endif 5824 5825 /* Get index sets for faces, edges and vertices from graph */ 5826 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5827 /* print some info */ 5828 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5829 PetscInt nv; 5830 5831 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5832 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5833 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5834 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5835 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5836 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5837 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5838 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5839 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5840 } 5841 5842 /* free unneeded index sets */ 5843 if (!pcbddc->use_vertices) { 5844 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5845 } 5846 if (!pcbddc->use_edges) { 5847 for (i=0;i<n_ISForEdges;i++) { 5848 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5849 } 5850 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5851 n_ISForEdges = 0; 5852 } 5853 if (!pcbddc->use_faces) { 5854 for (i=0;i<n_ISForFaces;i++) { 5855 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5856 } 5857 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5858 n_ISForFaces = 0; 5859 } 5860 5861 /* check if near null space is attached to global mat */ 5862 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5863 if (nearnullsp) { 5864 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5865 /* remove any stored info */ 5866 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5867 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5868 /* store information for BDDC solver reuse */ 5869 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5870 pcbddc->onearnullspace = nearnullsp; 5871 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5872 for (i=0;i<nnsp_size;i++) { 5873 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5874 } 5875 } else { /* if near null space is not provided BDDC uses constants by default */ 5876 nnsp_size = 0; 5877 nnsp_has_cnst = PETSC_TRUE; 5878 } 5879 /* get max number of constraints on a single cc */ 5880 max_constraints = nnsp_size; 5881 if (nnsp_has_cnst) max_constraints++; 5882 5883 /* 5884 Evaluate maximum storage size needed by the procedure 5885 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5886 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5887 There can be multiple constraints per connected component 5888 */ 5889 n_vertices = 0; 5890 if (ISForVertices) { 5891 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5892 } 5893 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5894 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5895 5896 total_counts = n_ISForFaces+n_ISForEdges; 5897 total_counts *= max_constraints; 5898 total_counts += n_vertices; 5899 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5900 5901 total_counts = 0; 5902 max_size_of_constraint = 0; 5903 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5904 IS used_is; 5905 if (i<n_ISForEdges) { 5906 used_is = ISForEdges[i]; 5907 } else { 5908 used_is = ISForFaces[i-n_ISForEdges]; 5909 } 5910 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5911 total_counts += j; 5912 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5913 } 5914 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5915 5916 /* get local part of global near null space vectors */ 5917 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5918 for (k=0;k<nnsp_size;k++) { 5919 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5920 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5921 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5922 } 5923 5924 /* whether or not to skip lapack calls */ 5925 skip_lapack = PETSC_TRUE; 5926 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5927 5928 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5929 if (!skip_lapack) { 5930 PetscScalar temp_work; 5931 5932 #if defined(PETSC_MISSING_LAPACK_GESVD) 5933 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5934 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5935 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5936 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5937 #if defined(PETSC_USE_COMPLEX) 5938 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5939 #endif 5940 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5941 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5942 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5943 lwork = -1; 5944 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5945 #if !defined(PETSC_USE_COMPLEX) 5946 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5947 #else 5948 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5949 #endif 5950 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5951 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5952 #else /* on missing GESVD */ 5953 /* SVD */ 5954 PetscInt max_n,min_n; 5955 max_n = max_size_of_constraint; 5956 min_n = max_constraints; 5957 if (max_size_of_constraint < max_constraints) { 5958 min_n = max_size_of_constraint; 5959 max_n = max_constraints; 5960 } 5961 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5962 #if defined(PETSC_USE_COMPLEX) 5963 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5964 #endif 5965 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5966 lwork = -1; 5967 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5968 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5969 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5970 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5971 #if !defined(PETSC_USE_COMPLEX) 5972 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5973 #else 5974 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5975 #endif 5976 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5977 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5978 #endif /* on missing GESVD */ 5979 /* Allocate optimal workspace */ 5980 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5981 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5982 } 5983 /* Now we can loop on constraining sets */ 5984 total_counts = 0; 5985 constraints_idxs_ptr[0] = 0; 5986 constraints_data_ptr[0] = 0; 5987 /* vertices */ 5988 if (n_vertices) { 5989 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5990 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5991 for (i=0;i<n_vertices;i++) { 5992 constraints_n[total_counts] = 1; 5993 constraints_data[total_counts] = 1.0; 5994 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5995 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5996 total_counts++; 5997 } 5998 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5999 n_vertices = total_counts; 6000 } 6001 6002 /* edges and faces */ 6003 total_counts_cc = total_counts; 6004 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6005 IS used_is; 6006 PetscBool idxs_copied = PETSC_FALSE; 6007 6008 if (ncc<n_ISForEdges) { 6009 used_is = ISForEdges[ncc]; 6010 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6011 } else { 6012 used_is = ISForFaces[ncc-n_ISForEdges]; 6013 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6014 } 6015 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6016 6017 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6018 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6019 /* change of basis should not be performed on local periodic nodes */ 6020 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6021 if (nnsp_has_cnst) { 6022 PetscScalar quad_value; 6023 6024 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6025 idxs_copied = PETSC_TRUE; 6026 6027 if (!pcbddc->use_nnsp_true) { 6028 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6029 } else { 6030 quad_value = 1.0; 6031 } 6032 for (j=0;j<size_of_constraint;j++) { 6033 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6034 } 6035 temp_constraints++; 6036 total_counts++; 6037 } 6038 for (k=0;k<nnsp_size;k++) { 6039 PetscReal real_value; 6040 PetscScalar *ptr_to_data; 6041 6042 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6043 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6044 for (j=0;j<size_of_constraint;j++) { 6045 ptr_to_data[j] = array[is_indices[j]]; 6046 } 6047 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6048 /* check if array is null on the connected component */ 6049 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6050 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6051 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6052 temp_constraints++; 6053 total_counts++; 6054 if (!idxs_copied) { 6055 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6056 idxs_copied = PETSC_TRUE; 6057 } 6058 } 6059 } 6060 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6061 valid_constraints = temp_constraints; 6062 if (!pcbddc->use_nnsp_true && temp_constraints) { 6063 if (temp_constraints == 1) { /* just normalize the constraint */ 6064 PetscScalar norm,*ptr_to_data; 6065 6066 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6067 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6068 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6069 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6070 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6071 } else { /* perform SVD */ 6072 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6073 6074 #if defined(PETSC_MISSING_LAPACK_GESVD) 6075 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6076 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6077 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6078 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6079 from that computed using LAPACKgesvd 6080 -> This is due to a different computation of eigenvectors in LAPACKheev 6081 -> The quality of the POD-computed basis will be the same */ 6082 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 6083 /* Store upper triangular part of correlation matrix */ 6084 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6085 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6086 for (j=0;j<temp_constraints;j++) { 6087 for (k=0;k<j+1;k++) { 6088 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6089 } 6090 } 6091 /* compute eigenvalues and eigenvectors of correlation matrix */ 6092 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6093 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6094 #if !defined(PETSC_USE_COMPLEX) 6095 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6096 #else 6097 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6098 #endif 6099 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6100 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6101 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6102 j = 0; 6103 while (j < temp_constraints && singular_vals[j] < tol) j++; 6104 total_counts = total_counts-j; 6105 valid_constraints = temp_constraints-j; 6106 /* scale and copy POD basis into used quadrature memory */ 6107 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6108 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6109 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6110 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6111 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6113 if (j<temp_constraints) { 6114 PetscInt ii; 6115 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6116 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6117 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6118 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6119 for (k=0;k<temp_constraints-j;k++) { 6120 for (ii=0;ii<size_of_constraint;ii++) { 6121 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6122 } 6123 } 6124 } 6125 #else /* on missing GESVD */ 6126 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6127 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6128 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6129 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6130 #if !defined(PETSC_USE_COMPLEX) 6131 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6132 #else 6133 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6134 #endif 6135 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6136 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6137 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6138 k = temp_constraints; 6139 if (k > size_of_constraint) k = size_of_constraint; 6140 j = 0; 6141 while (j < k && singular_vals[k-j-1] < tol) j++; 6142 valid_constraints = k-j; 6143 total_counts = total_counts-temp_constraints+valid_constraints; 6144 #endif /* on missing GESVD */ 6145 } 6146 } 6147 /* update pointers information */ 6148 if (valid_constraints) { 6149 constraints_n[total_counts_cc] = valid_constraints; 6150 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6151 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6152 /* set change_of_basis flag */ 6153 if (boolforchange) { 6154 PetscBTSet(change_basis,total_counts_cc); 6155 } 6156 total_counts_cc++; 6157 } 6158 } 6159 /* free workspace */ 6160 if (!skip_lapack) { 6161 ierr = PetscFree(work);CHKERRQ(ierr); 6162 #if defined(PETSC_USE_COMPLEX) 6163 ierr = PetscFree(rwork);CHKERRQ(ierr); 6164 #endif 6165 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6166 #if defined(PETSC_MISSING_LAPACK_GESVD) 6167 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6168 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6169 #endif 6170 } 6171 for (k=0;k<nnsp_size;k++) { 6172 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6173 } 6174 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6175 /* free index sets of faces, edges and vertices */ 6176 for (i=0;i<n_ISForFaces;i++) { 6177 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6178 } 6179 if (n_ISForFaces) { 6180 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6181 } 6182 for (i=0;i<n_ISForEdges;i++) { 6183 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6184 } 6185 if (n_ISForEdges) { 6186 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6187 } 6188 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6189 } else { 6190 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6191 6192 total_counts = 0; 6193 n_vertices = 0; 6194 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6195 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6196 } 6197 max_constraints = 0; 6198 total_counts_cc = 0; 6199 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6200 total_counts += pcbddc->adaptive_constraints_n[i]; 6201 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6202 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6203 } 6204 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6205 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6206 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6207 constraints_data = pcbddc->adaptive_constraints_data; 6208 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6209 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6210 total_counts_cc = 0; 6211 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6212 if (pcbddc->adaptive_constraints_n[i]) { 6213 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6214 } 6215 } 6216 #if 0 6217 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 6218 for (i=0;i<total_counts_cc;i++) { 6219 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 6220 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 6221 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 6222 printf(" %d",constraints_idxs[j]); 6223 } 6224 printf("\n"); 6225 printf("number of cc: %d\n",constraints_n[i]); 6226 } 6227 for (i=0;i<n_vertices;i++) { 6228 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 6229 } 6230 for (i=0;i<sub_schurs->n_subs;i++) { 6231 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 6232 } 6233 #endif 6234 6235 max_size_of_constraint = 0; 6236 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6237 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6238 /* Change of basis */ 6239 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6240 if (pcbddc->use_change_of_basis) { 6241 for (i=0;i<sub_schurs->n_subs;i++) { 6242 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6243 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6244 } 6245 } 6246 } 6247 } 6248 pcbddc->local_primal_size = total_counts; 6249 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6250 6251 /* map constraints_idxs in boundary numbering */ 6252 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6253 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 6254 6255 /* Create constraint matrix */ 6256 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6257 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6258 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6259 6260 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6261 /* determine if a QR strategy is needed for change of basis */ 6262 qr_needed = PETSC_FALSE; 6263 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6264 total_primal_vertices=0; 6265 pcbddc->local_primal_size_cc = 0; 6266 for (i=0;i<total_counts_cc;i++) { 6267 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6268 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6269 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6270 pcbddc->local_primal_size_cc += 1; 6271 } else if (PetscBTLookup(change_basis,i)) { 6272 for (k=0;k<constraints_n[i];k++) { 6273 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6274 } 6275 pcbddc->local_primal_size_cc += constraints_n[i]; 6276 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6277 PetscBTSet(qr_needed_idx,i); 6278 qr_needed = PETSC_TRUE; 6279 } 6280 } else { 6281 pcbddc->local_primal_size_cc += 1; 6282 } 6283 } 6284 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6285 pcbddc->n_vertices = total_primal_vertices; 6286 /* permute indices in order to have a sorted set of vertices */ 6287 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6288 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6289 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6290 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6291 6292 /* nonzero structure of constraint matrix */ 6293 /* and get reference dof for local constraints */ 6294 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6295 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6296 6297 j = total_primal_vertices; 6298 total_counts = total_primal_vertices; 6299 cum = total_primal_vertices; 6300 for (i=n_vertices;i<total_counts_cc;i++) { 6301 if (!PetscBTLookup(change_basis,i)) { 6302 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6303 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6304 cum++; 6305 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6306 for (k=0;k<constraints_n[i];k++) { 6307 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6308 nnz[j+k] = size_of_constraint; 6309 } 6310 j += constraints_n[i]; 6311 } 6312 } 6313 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6314 ierr = PetscFree(nnz);CHKERRQ(ierr); 6315 6316 /* set values in constraint matrix */ 6317 for (i=0;i<total_primal_vertices;i++) { 6318 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6319 } 6320 total_counts = total_primal_vertices; 6321 for (i=n_vertices;i<total_counts_cc;i++) { 6322 if (!PetscBTLookup(change_basis,i)) { 6323 PetscInt *cols; 6324 6325 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6326 cols = constraints_idxs+constraints_idxs_ptr[i]; 6327 for (k=0;k<constraints_n[i];k++) { 6328 PetscInt row = total_counts+k; 6329 PetscScalar *vals; 6330 6331 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6332 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6333 } 6334 total_counts += constraints_n[i]; 6335 } 6336 } 6337 /* assembling */ 6338 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6339 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6340 ierr = MatChop(pcbddc->ConstraintMatrix,PETSC_SMALL);CHKERRQ(ierr); 6341 ierr = MatSeqAIJCompress(pcbddc->ConstraintMatrix,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6342 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6343 6344 /* 6345 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6346 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6347 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6348 */ 6349 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6350 if (pcbddc->use_change_of_basis) { 6351 /* dual and primal dofs on a single cc */ 6352 PetscInt dual_dofs,primal_dofs; 6353 /* working stuff for GEQRF */ 6354 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6355 PetscBLASInt lqr_work; 6356 /* working stuff for UNGQR */ 6357 PetscScalar *gqr_work,lgqr_work_t; 6358 PetscBLASInt lgqr_work; 6359 /* working stuff for TRTRS */ 6360 PetscScalar *trs_rhs; 6361 PetscBLASInt Blas_NRHS; 6362 /* pointers for values insertion into change of basis matrix */ 6363 PetscInt *start_rows,*start_cols; 6364 PetscScalar *start_vals; 6365 /* working stuff for values insertion */ 6366 PetscBT is_primal; 6367 PetscInt *aux_primal_numbering_B; 6368 /* matrix sizes */ 6369 PetscInt global_size,local_size; 6370 /* temporary change of basis */ 6371 Mat localChangeOfBasisMatrix; 6372 /* extra space for debugging */ 6373 PetscScalar *dbg_work; 6374 6375 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6376 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6377 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6378 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6379 /* nonzeros for local mat */ 6380 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6381 if (!pcbddc->benign_change || pcbddc->fake_change) { 6382 for (i=0;i<pcis->n;i++) nnz[i]=1; 6383 } else { 6384 const PetscInt *ii; 6385 PetscInt n; 6386 PetscBool flg_row; 6387 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6388 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6389 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6390 } 6391 for (i=n_vertices;i<total_counts_cc;i++) { 6392 if (PetscBTLookup(change_basis,i)) { 6393 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6394 if (PetscBTLookup(qr_needed_idx,i)) { 6395 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6396 } else { 6397 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6398 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6399 } 6400 } 6401 } 6402 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6403 ierr = PetscFree(nnz);CHKERRQ(ierr); 6404 /* Set interior change in the matrix */ 6405 if (!pcbddc->benign_change || pcbddc->fake_change) { 6406 for (i=0;i<pcis->n;i++) { 6407 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6408 } 6409 } else { 6410 const PetscInt *ii,*jj; 6411 PetscScalar *aa; 6412 PetscInt n; 6413 PetscBool flg_row; 6414 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6415 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6416 for (i=0;i<n;i++) { 6417 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6418 } 6419 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6420 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6421 } 6422 6423 if (pcbddc->dbg_flag) { 6424 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6425 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6426 } 6427 6428 6429 /* Now we loop on the constraints which need a change of basis */ 6430 /* 6431 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6432 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6433 6434 Basic blocks of change of basis matrix T computed by 6435 6436 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6437 6438 | 1 0 ... 0 s_1/S | 6439 | 0 1 ... 0 s_2/S | 6440 | ... | 6441 | 0 ... 1 s_{n-1}/S | 6442 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6443 6444 with S = \sum_{i=1}^n s_i^2 6445 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6446 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6447 6448 - QR decomposition of constraints otherwise 6449 */ 6450 if (qr_needed) { 6451 /* space to store Q */ 6452 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6453 /* array to store scaling factors for reflectors */ 6454 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6455 /* first we issue queries for optimal work */ 6456 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6457 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6458 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6459 lqr_work = -1; 6460 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6461 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6462 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6463 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6464 lgqr_work = -1; 6465 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6466 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6467 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6468 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6469 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6470 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6471 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6472 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6473 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6474 /* array to store rhs and solution of triangular solver */ 6475 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6476 /* allocating workspace for check */ 6477 if (pcbddc->dbg_flag) { 6478 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6479 } 6480 } 6481 /* array to store whether a node is primal or not */ 6482 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6483 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6484 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6485 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6486 for (i=0;i<total_primal_vertices;i++) { 6487 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6488 } 6489 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6490 6491 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6492 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6493 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6494 if (PetscBTLookup(change_basis,total_counts)) { 6495 /* get constraint info */ 6496 primal_dofs = constraints_n[total_counts]; 6497 dual_dofs = size_of_constraint-primal_dofs; 6498 6499 if (pcbddc->dbg_flag) { 6500 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6501 } 6502 6503 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6504 6505 /* copy quadrature constraints for change of basis check */ 6506 if (pcbddc->dbg_flag) { 6507 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6508 } 6509 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6510 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6511 6512 /* compute QR decomposition of constraints */ 6513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6514 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6515 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6516 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6517 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6518 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6519 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6520 6521 /* explictly compute R^-T */ 6522 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6523 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6524 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6525 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6526 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6527 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6528 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6529 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6530 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6531 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6532 6533 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6535 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6536 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6537 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6538 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6539 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6540 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6541 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6542 6543 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6544 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6545 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6546 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6547 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6548 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6549 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6550 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6551 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6552 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6553 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6554 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6555 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6556 6557 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6558 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6559 /* insert cols for primal dofs */ 6560 for (j=0;j<primal_dofs;j++) { 6561 start_vals = &qr_basis[j*size_of_constraint]; 6562 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6563 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6564 } 6565 /* insert cols for dual dofs */ 6566 for (j=0,k=0;j<dual_dofs;k++) { 6567 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6568 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6569 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6570 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6571 j++; 6572 } 6573 } 6574 6575 /* check change of basis */ 6576 if (pcbddc->dbg_flag) { 6577 PetscInt ii,jj; 6578 PetscBool valid_qr=PETSC_TRUE; 6579 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6580 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6581 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6582 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6583 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6584 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6585 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6586 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6587 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6588 for (jj=0;jj<size_of_constraint;jj++) { 6589 for (ii=0;ii<primal_dofs;ii++) { 6590 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6591 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6592 } 6593 } 6594 if (!valid_qr) { 6595 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6596 for (jj=0;jj<size_of_constraint;jj++) { 6597 for (ii=0;ii<primal_dofs;ii++) { 6598 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6599 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6600 } 6601 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6602 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6603 } 6604 } 6605 } 6606 } else { 6607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6608 } 6609 } 6610 } else { /* simple transformation block */ 6611 PetscInt row,col; 6612 PetscScalar val,norm; 6613 6614 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6615 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6616 for (j=0;j<size_of_constraint;j++) { 6617 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6618 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6619 if (!PetscBTLookup(is_primal,row_B)) { 6620 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6621 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6622 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6623 } else { 6624 for (k=0;k<size_of_constraint;k++) { 6625 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6626 if (row != col) { 6627 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6628 } else { 6629 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6630 } 6631 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6632 } 6633 } 6634 } 6635 if (pcbddc->dbg_flag) { 6636 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6637 } 6638 } 6639 } else { 6640 if (pcbddc->dbg_flag) { 6641 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6642 } 6643 } 6644 } 6645 6646 /* free workspace */ 6647 if (qr_needed) { 6648 if (pcbddc->dbg_flag) { 6649 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6650 } 6651 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6652 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6653 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6654 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6655 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6656 } 6657 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6658 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6659 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6660 6661 /* assembling of global change of variable */ 6662 if (!pcbddc->fake_change) { 6663 Mat tmat; 6664 PetscInt bs; 6665 6666 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6667 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6668 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6669 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6670 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6671 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6672 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6673 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6674 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6675 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6676 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6677 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6678 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6679 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6680 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6681 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6682 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6683 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6684 6685 /* check */ 6686 if (pcbddc->dbg_flag) { 6687 PetscReal error; 6688 Vec x,x_change; 6689 6690 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6691 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6692 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6693 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6694 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6695 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6696 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6697 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6698 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6699 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6700 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6701 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6702 if (error > PETSC_SMALL) { 6703 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6704 } 6705 ierr = VecDestroy(&x);CHKERRQ(ierr); 6706 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6707 } 6708 /* adapt sub_schurs computed (if any) */ 6709 if (pcbddc->use_deluxe_scaling) { 6710 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6711 6712 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6713 if (sub_schurs && sub_schurs->S_Ej_all) { 6714 Mat S_new,tmat; 6715 IS is_all_N,is_V_Sall = NULL; 6716 6717 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6718 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6719 if (pcbddc->deluxe_zerorows) { 6720 ISLocalToGlobalMapping NtoSall; 6721 IS is_V; 6722 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6723 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6724 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6725 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6726 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6727 } 6728 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6729 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6730 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6731 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6732 if (pcbddc->deluxe_zerorows) { 6733 const PetscScalar *array; 6734 const PetscInt *idxs_V,*idxs_all; 6735 PetscInt i,n_V; 6736 6737 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6738 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6739 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6740 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6741 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6742 for (i=0;i<n_V;i++) { 6743 PetscScalar val; 6744 PetscInt idx; 6745 6746 idx = idxs_V[i]; 6747 val = array[idxs_all[idxs_V[i]]]; 6748 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6749 } 6750 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6751 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6752 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6753 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6754 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6755 } 6756 sub_schurs->S_Ej_all = S_new; 6757 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6758 if (sub_schurs->sum_S_Ej_all) { 6759 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6760 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6761 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6762 if (pcbddc->deluxe_zerorows) { 6763 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6764 } 6765 sub_schurs->sum_S_Ej_all = S_new; 6766 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6767 } 6768 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6769 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6770 } 6771 /* destroy any change of basis context in sub_schurs */ 6772 if (sub_schurs && sub_schurs->change) { 6773 PetscInt i; 6774 6775 for (i=0;i<sub_schurs->n_subs;i++) { 6776 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6777 } 6778 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6779 } 6780 } 6781 if (pcbddc->switch_static) { /* need to save the local change */ 6782 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6783 } else { 6784 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6785 } 6786 /* determine if any process has changed the pressures locally */ 6787 pcbddc->change_interior = pcbddc->benign_have_null; 6788 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6789 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6790 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6791 pcbddc->use_qr_single = qr_needed; 6792 } 6793 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6794 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6795 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6796 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6797 } else { 6798 Mat benign_global = NULL; 6799 if (pcbddc->benign_have_null) { 6800 Mat tmat; 6801 6802 pcbddc->change_interior = PETSC_TRUE; 6803 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6804 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6805 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6806 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6807 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6808 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6809 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6810 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6811 if (pcbddc->benign_change) { 6812 Mat M; 6813 6814 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6815 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6816 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6817 ierr = MatDestroy(&M);CHKERRQ(ierr); 6818 } else { 6819 Mat eye; 6820 PetscScalar *array; 6821 6822 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6823 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6824 for (i=0;i<pcis->n;i++) { 6825 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6826 } 6827 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6828 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6829 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6830 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6831 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6832 } 6833 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6834 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6835 } 6836 if (pcbddc->user_ChangeOfBasisMatrix) { 6837 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6838 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6839 } else if (pcbddc->benign_have_null) { 6840 pcbddc->ChangeOfBasisMatrix = benign_global; 6841 } 6842 } 6843 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6844 IS is_global; 6845 const PetscInt *gidxs; 6846 6847 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6848 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6849 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6850 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6851 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6852 } 6853 } 6854 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6855 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6856 } 6857 6858 if (!pcbddc->fake_change) { 6859 /* add pressure dofs to set of primal nodes for numbering purposes */ 6860 for (i=0;i<pcbddc->benign_n;i++) { 6861 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6862 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6863 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6864 pcbddc->local_primal_size_cc++; 6865 pcbddc->local_primal_size++; 6866 } 6867 6868 /* check if a new primal space has been introduced (also take into account benign trick) */ 6869 pcbddc->new_primal_space_local = PETSC_TRUE; 6870 if (olocal_primal_size == pcbddc->local_primal_size) { 6871 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6872 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6873 if (!pcbddc->new_primal_space_local) { 6874 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6875 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6876 } 6877 } 6878 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6879 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6880 } 6881 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6882 6883 /* flush dbg viewer */ 6884 if (pcbddc->dbg_flag) { 6885 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6886 } 6887 6888 /* free workspace */ 6889 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6890 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6891 if (!pcbddc->adaptive_selection) { 6892 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6893 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6894 } else { 6895 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6896 pcbddc->adaptive_constraints_idxs_ptr, 6897 pcbddc->adaptive_constraints_data_ptr, 6898 pcbddc->adaptive_constraints_idxs, 6899 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6900 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6901 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6902 } 6903 PetscFunctionReturn(0); 6904 } 6905 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6906 6907 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6908 { 6909 ISLocalToGlobalMapping map; 6910 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6911 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6912 PetscInt i,N; 6913 PetscBool rcsr = PETSC_FALSE; 6914 PetscErrorCode ierr; 6915 6916 PetscFunctionBegin; 6917 if (pcbddc->recompute_topography) { 6918 pcbddc->graphanalyzed = PETSC_FALSE; 6919 /* Reset previously computed graph */ 6920 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6921 /* Init local Graph struct */ 6922 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6923 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6924 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6925 6926 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6927 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6928 } 6929 /* Check validity of the csr graph passed in by the user */ 6930 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6931 6932 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6933 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6934 PetscInt *xadj,*adjncy; 6935 PetscInt nvtxs; 6936 PetscBool flg_row=PETSC_FALSE; 6937 6938 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6939 if (flg_row) { 6940 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6941 pcbddc->computed_rowadj = PETSC_TRUE; 6942 } 6943 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6944 rcsr = PETSC_TRUE; 6945 } 6946 if (pcbddc->dbg_flag) { 6947 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6948 } 6949 6950 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6951 PetscReal *lcoords; 6952 PetscInt n; 6953 MPI_Datatype dimrealtype; 6954 6955 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 6956 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 6957 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 6958 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 6959 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 6960 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 6961 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6962 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6963 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 6964 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 6965 6966 pcbddc->mat_graph->coords = lcoords; 6967 pcbddc->mat_graph->cloc = PETSC_TRUE; 6968 pcbddc->mat_graph->cnloc = n; 6969 } 6970 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 6971 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 6972 6973 /* Setup of Graph */ 6974 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6975 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6976 6977 /* attach info on disconnected subdomains if present */ 6978 if (pcbddc->n_local_subs) { 6979 PetscInt *local_subs; 6980 6981 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6982 for (i=0;i<pcbddc->n_local_subs;i++) { 6983 const PetscInt *idxs; 6984 PetscInt nl,j; 6985 6986 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6987 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6988 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6989 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6990 } 6991 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6992 pcbddc->mat_graph->local_subs = local_subs; 6993 } 6994 } 6995 6996 if (!pcbddc->graphanalyzed) { 6997 /* Graph's connected components analysis */ 6998 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6999 pcbddc->graphanalyzed = PETSC_TRUE; 7000 } 7001 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7002 PetscFunctionReturn(0); 7003 } 7004 7005 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 7006 { 7007 PetscInt i,j; 7008 PetscScalar *alphas; 7009 PetscErrorCode ierr; 7010 7011 PetscFunctionBegin; 7012 if (!n) PetscFunctionReturn(0); 7013 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 7014 ierr = VecNormalize(vecs[0],NULL);CHKERRQ(ierr); 7015 for (i=1;i<n;i++) { 7016 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7017 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7018 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7019 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 7020 } 7021 ierr = PetscFree(alphas);CHKERRQ(ierr); 7022 PetscFunctionReturn(0); 7023 } 7024 7025 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7026 { 7027 Mat A; 7028 PetscInt n_neighs,*neighs,*n_shared,**shared; 7029 PetscMPIInt size,rank,color; 7030 PetscInt *xadj,*adjncy; 7031 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7032 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7033 PetscInt void_procs,*procs_candidates = NULL; 7034 PetscInt xadj_count,*count; 7035 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7036 PetscSubcomm psubcomm; 7037 MPI_Comm subcomm; 7038 PetscErrorCode ierr; 7039 7040 PetscFunctionBegin; 7041 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7042 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7043 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7044 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7045 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7046 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 7047 7048 if (have_void) *have_void = PETSC_FALSE; 7049 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7050 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7051 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7052 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7053 im_active = !!n; 7054 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7055 void_procs = size - active_procs; 7056 /* get ranks of of non-active processes in mat communicator */ 7057 if (void_procs) { 7058 PetscInt ncand; 7059 7060 if (have_void) *have_void = PETSC_TRUE; 7061 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7062 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7063 for (i=0,ncand=0;i<size;i++) { 7064 if (!procs_candidates[i]) { 7065 procs_candidates[ncand++] = i; 7066 } 7067 } 7068 /* force n_subdomains to be not greater that the number of non-active processes */ 7069 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7070 } 7071 7072 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7073 number of subdomains requested 1 -> send to master or first candidate in voids */ 7074 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7075 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7076 PetscInt issize,isidx,dest; 7077 if (*n_subdomains == 1) dest = 0; 7078 else dest = rank; 7079 if (im_active) { 7080 issize = 1; 7081 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7082 isidx = procs_candidates[dest]; 7083 } else { 7084 isidx = dest; 7085 } 7086 } else { 7087 issize = 0; 7088 isidx = -1; 7089 } 7090 if (*n_subdomains != 1) *n_subdomains = active_procs; 7091 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7092 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7093 PetscFunctionReturn(0); 7094 } 7095 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7096 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7097 threshold = PetscMax(threshold,2); 7098 7099 /* Get info on mapping */ 7100 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7101 7102 /* build local CSR graph of subdomains' connectivity */ 7103 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7104 xadj[0] = 0; 7105 xadj[1] = PetscMax(n_neighs-1,0); 7106 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7107 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7108 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7109 for (i=1;i<n_neighs;i++) 7110 for (j=0;j<n_shared[i];j++) 7111 count[shared[i][j]] += 1; 7112 7113 xadj_count = 0; 7114 for (i=1;i<n_neighs;i++) { 7115 for (j=0;j<n_shared[i];j++) { 7116 if (count[shared[i][j]] < threshold) { 7117 adjncy[xadj_count] = neighs[i]; 7118 adjncy_wgt[xadj_count] = n_shared[i]; 7119 xadj_count++; 7120 break; 7121 } 7122 } 7123 } 7124 xadj[1] = xadj_count; 7125 ierr = PetscFree(count);CHKERRQ(ierr); 7126 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7127 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7128 7129 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7130 7131 /* Restrict work on active processes only */ 7132 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7133 if (void_procs) { 7134 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7135 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7136 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7137 subcomm = PetscSubcommChild(psubcomm); 7138 } else { 7139 psubcomm = NULL; 7140 subcomm = PetscObjectComm((PetscObject)mat); 7141 } 7142 7143 v_wgt = NULL; 7144 if (!color) { 7145 ierr = PetscFree(xadj);CHKERRQ(ierr); 7146 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7147 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7148 } else { 7149 Mat subdomain_adj; 7150 IS new_ranks,new_ranks_contig; 7151 MatPartitioning partitioner; 7152 PetscInt rstart=0,rend=0; 7153 PetscInt *is_indices,*oldranks; 7154 PetscMPIInt size; 7155 PetscBool aggregate; 7156 7157 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7158 if (void_procs) { 7159 PetscInt prank = rank; 7160 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7161 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7162 for (i=0;i<xadj[1];i++) { 7163 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7164 } 7165 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7166 } else { 7167 oldranks = NULL; 7168 } 7169 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7170 if (aggregate) { /* TODO: all this part could be made more efficient */ 7171 PetscInt lrows,row,ncols,*cols; 7172 PetscMPIInt nrank; 7173 PetscScalar *vals; 7174 7175 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7176 lrows = 0; 7177 if (nrank<redprocs) { 7178 lrows = size/redprocs; 7179 if (nrank<size%redprocs) lrows++; 7180 } 7181 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7182 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7183 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7184 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7185 row = nrank; 7186 ncols = xadj[1]-xadj[0]; 7187 cols = adjncy; 7188 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7189 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7190 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7191 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7192 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7193 ierr = PetscFree(xadj);CHKERRQ(ierr); 7194 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7195 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7196 ierr = PetscFree(vals);CHKERRQ(ierr); 7197 if (use_vwgt) { 7198 Vec v; 7199 const PetscScalar *array; 7200 PetscInt nl; 7201 7202 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7203 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7204 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7205 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7206 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7207 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7208 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7209 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7210 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7211 ierr = VecDestroy(&v);CHKERRQ(ierr); 7212 } 7213 } else { 7214 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7215 if (use_vwgt) { 7216 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7217 v_wgt[0] = n; 7218 } 7219 } 7220 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7221 7222 /* Partition */ 7223 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7224 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7225 if (v_wgt) { 7226 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7227 } 7228 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7229 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7230 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7231 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7232 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7233 7234 /* renumber new_ranks to avoid "holes" in new set of processors */ 7235 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7236 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7237 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7238 if (!aggregate) { 7239 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7240 #if defined(PETSC_USE_DEBUG) 7241 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7242 #endif 7243 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7244 } else if (oldranks) { 7245 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7246 } else { 7247 ranks_send_to_idx[0] = is_indices[0]; 7248 } 7249 } else { 7250 PetscInt idx = 0; 7251 PetscMPIInt tag; 7252 MPI_Request *reqs; 7253 7254 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7255 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7256 for (i=rstart;i<rend;i++) { 7257 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7258 } 7259 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7260 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7261 ierr = PetscFree(reqs);CHKERRQ(ierr); 7262 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7263 #if defined(PETSC_USE_DEBUG) 7264 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7265 #endif 7266 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7267 } else if (oldranks) { 7268 ranks_send_to_idx[0] = oldranks[idx]; 7269 } else { 7270 ranks_send_to_idx[0] = idx; 7271 } 7272 } 7273 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7274 /* clean up */ 7275 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7276 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7277 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7278 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7279 } 7280 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7281 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7282 7283 /* assemble parallel IS for sends */ 7284 i = 1; 7285 if (!color) i=0; 7286 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7287 PetscFunctionReturn(0); 7288 } 7289 7290 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7291 7292 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7293 { 7294 Mat local_mat; 7295 IS is_sends_internal; 7296 PetscInt rows,cols,new_local_rows; 7297 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7298 PetscBool ismatis,isdense,newisdense,destroy_mat; 7299 ISLocalToGlobalMapping l2gmap; 7300 PetscInt* l2gmap_indices; 7301 const PetscInt* is_indices; 7302 MatType new_local_type; 7303 /* buffers */ 7304 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7305 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7306 PetscInt *recv_buffer_idxs_local; 7307 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7308 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7309 /* MPI */ 7310 MPI_Comm comm,comm_n; 7311 PetscSubcomm subcomm; 7312 PetscMPIInt n_sends,n_recvs,commsize; 7313 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7314 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7315 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7316 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7317 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7318 PetscErrorCode ierr; 7319 7320 PetscFunctionBegin; 7321 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7322 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7323 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7324 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7325 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7326 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7327 PetscValidLogicalCollectiveBool(mat,reuse,6); 7328 PetscValidLogicalCollectiveInt(mat,nis,8); 7329 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7330 if (nvecs) { 7331 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7332 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7333 } 7334 /* further checks */ 7335 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7336 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7337 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7338 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7339 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7340 if (reuse && *mat_n) { 7341 PetscInt mrows,mcols,mnrows,mncols; 7342 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7343 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7344 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7345 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7346 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7347 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7348 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7349 } 7350 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7351 PetscValidLogicalCollectiveInt(mat,bs,0); 7352 7353 /* prepare IS for sending if not provided */ 7354 if (!is_sends) { 7355 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7356 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7357 } else { 7358 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7359 is_sends_internal = is_sends; 7360 } 7361 7362 /* get comm */ 7363 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7364 7365 /* compute number of sends */ 7366 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7367 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7368 7369 /* compute number of receives */ 7370 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7371 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7372 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7373 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7374 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7375 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7376 ierr = PetscFree(iflags);CHKERRQ(ierr); 7377 7378 /* restrict comm if requested */ 7379 subcomm = 0; 7380 destroy_mat = PETSC_FALSE; 7381 if (restrict_comm) { 7382 PetscMPIInt color,subcommsize; 7383 7384 color = 0; 7385 if (restrict_full) { 7386 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7387 } else { 7388 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7389 } 7390 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7391 subcommsize = commsize - subcommsize; 7392 /* check if reuse has been requested */ 7393 if (reuse) { 7394 if (*mat_n) { 7395 PetscMPIInt subcommsize2; 7396 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7397 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7398 comm_n = PetscObjectComm((PetscObject)*mat_n); 7399 } else { 7400 comm_n = PETSC_COMM_SELF; 7401 } 7402 } else { /* MAT_INITIAL_MATRIX */ 7403 PetscMPIInt rank; 7404 7405 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7406 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7407 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7408 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7409 comm_n = PetscSubcommChild(subcomm); 7410 } 7411 /* flag to destroy *mat_n if not significative */ 7412 if (color) destroy_mat = PETSC_TRUE; 7413 } else { 7414 comm_n = comm; 7415 } 7416 7417 /* prepare send/receive buffers */ 7418 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7419 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7420 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7421 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7422 if (nis) { 7423 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7424 } 7425 7426 /* Get data from local matrices */ 7427 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7428 /* TODO: See below some guidelines on how to prepare the local buffers */ 7429 /* 7430 send_buffer_vals should contain the raw values of the local matrix 7431 send_buffer_idxs should contain: 7432 - MatType_PRIVATE type 7433 - PetscInt size_of_l2gmap 7434 - PetscInt global_row_indices[size_of_l2gmap] 7435 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7436 */ 7437 else { 7438 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7439 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7440 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7441 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7442 send_buffer_idxs[1] = i; 7443 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7444 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7445 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7446 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7447 for (i=0;i<n_sends;i++) { 7448 ilengths_vals[is_indices[i]] = len*len; 7449 ilengths_idxs[is_indices[i]] = len+2; 7450 } 7451 } 7452 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7453 /* additional is (if any) */ 7454 if (nis) { 7455 PetscMPIInt psum; 7456 PetscInt j; 7457 for (j=0,psum=0;j<nis;j++) { 7458 PetscInt plen; 7459 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7460 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7461 psum += len+1; /* indices + lenght */ 7462 } 7463 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7464 for (j=0,psum=0;j<nis;j++) { 7465 PetscInt plen; 7466 const PetscInt *is_array_idxs; 7467 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7468 send_buffer_idxs_is[psum] = plen; 7469 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7470 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7471 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7472 psum += plen+1; /* indices + lenght */ 7473 } 7474 for (i=0;i<n_sends;i++) { 7475 ilengths_idxs_is[is_indices[i]] = psum; 7476 } 7477 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7478 } 7479 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7480 7481 buf_size_idxs = 0; 7482 buf_size_vals = 0; 7483 buf_size_idxs_is = 0; 7484 buf_size_vecs = 0; 7485 for (i=0;i<n_recvs;i++) { 7486 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7487 buf_size_vals += (PetscInt)olengths_vals[i]; 7488 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7489 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7490 } 7491 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7492 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7493 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7494 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7495 7496 /* get new tags for clean communications */ 7497 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7498 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7499 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7500 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7501 7502 /* allocate for requests */ 7503 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7504 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7505 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7506 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7507 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7508 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7509 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7510 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7511 7512 /* communications */ 7513 ptr_idxs = recv_buffer_idxs; 7514 ptr_vals = recv_buffer_vals; 7515 ptr_idxs_is = recv_buffer_idxs_is; 7516 ptr_vecs = recv_buffer_vecs; 7517 for (i=0;i<n_recvs;i++) { 7518 source_dest = onodes[i]; 7519 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7520 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7521 ptr_idxs += olengths_idxs[i]; 7522 ptr_vals += olengths_vals[i]; 7523 if (nis) { 7524 source_dest = onodes_is[i]; 7525 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7526 ptr_idxs_is += olengths_idxs_is[i]; 7527 } 7528 if (nvecs) { 7529 source_dest = onodes[i]; 7530 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7531 ptr_vecs += olengths_idxs[i]-2; 7532 } 7533 } 7534 for (i=0;i<n_sends;i++) { 7535 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7536 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7537 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7538 if (nis) { 7539 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7540 } 7541 if (nvecs) { 7542 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7543 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7544 } 7545 } 7546 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7547 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7548 7549 /* assemble new l2g map */ 7550 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7551 ptr_idxs = recv_buffer_idxs; 7552 new_local_rows = 0; 7553 for (i=0;i<n_recvs;i++) { 7554 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7555 ptr_idxs += olengths_idxs[i]; 7556 } 7557 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7558 ptr_idxs = recv_buffer_idxs; 7559 new_local_rows = 0; 7560 for (i=0;i<n_recvs;i++) { 7561 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7562 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7563 ptr_idxs += olengths_idxs[i]; 7564 } 7565 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7566 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7567 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7568 7569 /* infer new local matrix type from received local matrices type */ 7570 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7571 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7572 if (n_recvs) { 7573 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7574 ptr_idxs = recv_buffer_idxs; 7575 for (i=0;i<n_recvs;i++) { 7576 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7577 new_local_type_private = MATAIJ_PRIVATE; 7578 break; 7579 } 7580 ptr_idxs += olengths_idxs[i]; 7581 } 7582 switch (new_local_type_private) { 7583 case MATDENSE_PRIVATE: 7584 new_local_type = MATSEQAIJ; 7585 bs = 1; 7586 break; 7587 case MATAIJ_PRIVATE: 7588 new_local_type = MATSEQAIJ; 7589 bs = 1; 7590 break; 7591 case MATBAIJ_PRIVATE: 7592 new_local_type = MATSEQBAIJ; 7593 break; 7594 case MATSBAIJ_PRIVATE: 7595 new_local_type = MATSEQSBAIJ; 7596 break; 7597 default: 7598 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7599 break; 7600 } 7601 } else { /* by default, new_local_type is seqaij */ 7602 new_local_type = MATSEQAIJ; 7603 bs = 1; 7604 } 7605 7606 /* create MATIS object if needed */ 7607 if (!reuse) { 7608 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7609 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7610 } else { 7611 /* it also destroys the local matrices */ 7612 if (*mat_n) { 7613 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7614 } else { /* this is a fake object */ 7615 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7616 } 7617 } 7618 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7619 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7620 7621 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7622 7623 /* Global to local map of received indices */ 7624 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7625 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7626 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7627 7628 /* restore attributes -> type of incoming data and its size */ 7629 buf_size_idxs = 0; 7630 for (i=0;i<n_recvs;i++) { 7631 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7632 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7633 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7634 } 7635 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7636 7637 /* set preallocation */ 7638 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7639 if (!newisdense) { 7640 PetscInt *new_local_nnz=0; 7641 7642 ptr_idxs = recv_buffer_idxs_local; 7643 if (n_recvs) { 7644 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7645 } 7646 for (i=0;i<n_recvs;i++) { 7647 PetscInt j; 7648 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7649 for (j=0;j<*(ptr_idxs+1);j++) { 7650 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7651 } 7652 } else { 7653 /* TODO */ 7654 } 7655 ptr_idxs += olengths_idxs[i]; 7656 } 7657 if (new_local_nnz) { 7658 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7659 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7660 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7661 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7662 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7663 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7664 } else { 7665 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7666 } 7667 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7668 } else { 7669 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7670 } 7671 7672 /* set values */ 7673 ptr_vals = recv_buffer_vals; 7674 ptr_idxs = recv_buffer_idxs_local; 7675 for (i=0;i<n_recvs;i++) { 7676 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7677 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7678 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7679 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7680 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7681 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7682 } else { 7683 /* TODO */ 7684 } 7685 ptr_idxs += olengths_idxs[i]; 7686 ptr_vals += olengths_vals[i]; 7687 } 7688 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7689 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7690 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7691 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7692 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7693 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7694 7695 #if 0 7696 if (!restrict_comm) { /* check */ 7697 Vec lvec,rvec; 7698 PetscReal infty_error; 7699 7700 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7701 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7702 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7703 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7704 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7705 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7706 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7707 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7708 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7709 } 7710 #endif 7711 7712 /* assemble new additional is (if any) */ 7713 if (nis) { 7714 PetscInt **temp_idxs,*count_is,j,psum; 7715 7716 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7717 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7718 ptr_idxs = recv_buffer_idxs_is; 7719 psum = 0; 7720 for (i=0;i<n_recvs;i++) { 7721 for (j=0;j<nis;j++) { 7722 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7723 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7724 psum += plen; 7725 ptr_idxs += plen+1; /* shift pointer to received data */ 7726 } 7727 } 7728 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7729 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7730 for (i=1;i<nis;i++) { 7731 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7732 } 7733 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7734 ptr_idxs = recv_buffer_idxs_is; 7735 for (i=0;i<n_recvs;i++) { 7736 for (j=0;j<nis;j++) { 7737 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7738 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7739 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7740 ptr_idxs += plen+1; /* shift pointer to received data */ 7741 } 7742 } 7743 for (i=0;i<nis;i++) { 7744 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7745 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7746 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7747 } 7748 ierr = PetscFree(count_is);CHKERRQ(ierr); 7749 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7750 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7751 } 7752 /* free workspace */ 7753 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7754 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7755 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7756 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7757 if (isdense) { 7758 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7759 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7760 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7761 } else { 7762 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7763 } 7764 if (nis) { 7765 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7766 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7767 } 7768 7769 if (nvecs) { 7770 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7771 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7772 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7773 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7774 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7775 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7776 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7777 /* set values */ 7778 ptr_vals = recv_buffer_vecs; 7779 ptr_idxs = recv_buffer_idxs_local; 7780 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7781 for (i=0;i<n_recvs;i++) { 7782 PetscInt j; 7783 for (j=0;j<*(ptr_idxs+1);j++) { 7784 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7785 } 7786 ptr_idxs += olengths_idxs[i]; 7787 ptr_vals += olengths_idxs[i]-2; 7788 } 7789 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7790 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7791 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7792 } 7793 7794 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7795 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7796 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7797 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7798 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7799 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7800 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7801 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7802 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7803 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7804 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7805 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7806 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7807 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7808 ierr = PetscFree(onodes);CHKERRQ(ierr); 7809 if (nis) { 7810 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7811 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7812 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7813 } 7814 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7815 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7816 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7817 for (i=0;i<nis;i++) { 7818 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7819 } 7820 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7821 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7822 } 7823 *mat_n = NULL; 7824 } 7825 PetscFunctionReturn(0); 7826 } 7827 7828 /* temporary hack into ksp private data structure */ 7829 #include <petsc/private/kspimpl.h> 7830 7831 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7832 { 7833 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7834 PC_IS *pcis = (PC_IS*)pc->data; 7835 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7836 Mat coarsedivudotp = NULL; 7837 Mat coarseG,t_coarse_mat_is; 7838 MatNullSpace CoarseNullSpace = NULL; 7839 ISLocalToGlobalMapping coarse_islg; 7840 IS coarse_is,*isarray; 7841 PetscInt i,im_active=-1,active_procs=-1; 7842 PetscInt nis,nisdofs,nisneu,nisvert; 7843 PC pc_temp; 7844 PCType coarse_pc_type; 7845 KSPType coarse_ksp_type; 7846 PetscBool multilevel_requested,multilevel_allowed; 7847 PetscBool coarse_reuse; 7848 PetscInt ncoarse,nedcfield; 7849 PetscBool compute_vecs = PETSC_FALSE; 7850 PetscScalar *array; 7851 MatReuse coarse_mat_reuse; 7852 PetscBool restr, full_restr, have_void; 7853 PetscMPIInt commsize; 7854 PetscErrorCode ierr; 7855 7856 PetscFunctionBegin; 7857 /* Assign global numbering to coarse dofs */ 7858 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7859 PetscInt ocoarse_size; 7860 compute_vecs = PETSC_TRUE; 7861 7862 pcbddc->new_primal_space = PETSC_TRUE; 7863 ocoarse_size = pcbddc->coarse_size; 7864 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7865 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7866 /* see if we can avoid some work */ 7867 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7868 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7869 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7870 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7871 coarse_reuse = PETSC_FALSE; 7872 } else { /* we can safely reuse already computed coarse matrix */ 7873 coarse_reuse = PETSC_TRUE; 7874 } 7875 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7876 coarse_reuse = PETSC_FALSE; 7877 } 7878 /* reset any subassembling information */ 7879 if (!coarse_reuse || pcbddc->recompute_topography) { 7880 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7881 } 7882 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7883 coarse_reuse = PETSC_TRUE; 7884 } 7885 /* assemble coarse matrix */ 7886 if (coarse_reuse && pcbddc->coarse_ksp) { 7887 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7888 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7889 coarse_mat_reuse = MAT_REUSE_MATRIX; 7890 } else { 7891 coarse_mat = NULL; 7892 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7893 } 7894 7895 /* creates temporary l2gmap and IS for coarse indexes */ 7896 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7897 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7898 7899 /* creates temporary MATIS object for coarse matrix */ 7900 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7901 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7902 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7903 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7904 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7905 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7906 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7907 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7908 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7909 7910 /* count "active" (i.e. with positive local size) and "void" processes */ 7911 im_active = !!(pcis->n); 7912 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7913 7914 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7915 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7916 /* full_restr : just use the receivers from the subassembling pattern */ 7917 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7918 coarse_mat_is = NULL; 7919 multilevel_allowed = PETSC_FALSE; 7920 multilevel_requested = PETSC_FALSE; 7921 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7922 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7923 if (multilevel_requested) { 7924 ncoarse = active_procs/pcbddc->coarsening_ratio; 7925 restr = PETSC_FALSE; 7926 full_restr = PETSC_FALSE; 7927 } else { 7928 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7929 restr = PETSC_TRUE; 7930 full_restr = PETSC_TRUE; 7931 } 7932 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7933 ncoarse = PetscMax(1,ncoarse); 7934 if (!pcbddc->coarse_subassembling) { 7935 if (pcbddc->coarsening_ratio > 1) { 7936 if (multilevel_requested) { 7937 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7938 } else { 7939 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7940 } 7941 } else { 7942 PetscMPIInt rank; 7943 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7944 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7945 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7946 } 7947 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7948 PetscInt psum; 7949 if (pcbddc->coarse_ksp) psum = 1; 7950 else psum = 0; 7951 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7952 if (ncoarse < commsize) have_void = PETSC_TRUE; 7953 } 7954 /* determine if we can go multilevel */ 7955 if (multilevel_requested) { 7956 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7957 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7958 } 7959 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7960 7961 /* dump subassembling pattern */ 7962 if (pcbddc->dbg_flag && multilevel_allowed) { 7963 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7964 } 7965 7966 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7967 nedcfield = -1; 7968 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7969 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7970 const PetscInt *idxs; 7971 ISLocalToGlobalMapping tmap; 7972 7973 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7974 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7975 /* allocate space for temporary storage */ 7976 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7977 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7978 /* allocate for IS array */ 7979 nisdofs = pcbddc->n_ISForDofsLocal; 7980 if (pcbddc->nedclocal) { 7981 if (pcbddc->nedfield > -1) { 7982 nedcfield = pcbddc->nedfield; 7983 } else { 7984 nedcfield = 0; 7985 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7986 nisdofs = 1; 7987 } 7988 } 7989 nisneu = !!pcbddc->NeumannBoundariesLocal; 7990 nisvert = 0; /* nisvert is not used */ 7991 nis = nisdofs + nisneu + nisvert; 7992 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7993 /* dofs splitting */ 7994 for (i=0;i<nisdofs;i++) { 7995 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7996 if (nedcfield != i) { 7997 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7998 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7999 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8000 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8001 } else { 8002 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8003 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8004 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8005 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 8006 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8007 } 8008 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8009 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8010 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8011 } 8012 /* neumann boundaries */ 8013 if (pcbddc->NeumannBoundariesLocal) { 8014 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8015 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8016 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8017 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8018 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8019 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8020 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8021 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8022 } 8023 /* free memory */ 8024 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8025 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8026 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8027 } else { 8028 nis = 0; 8029 nisdofs = 0; 8030 nisneu = 0; 8031 nisvert = 0; 8032 isarray = NULL; 8033 } 8034 /* destroy no longer needed map */ 8035 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8036 8037 /* subassemble */ 8038 if (multilevel_allowed) { 8039 Vec vp[1]; 8040 PetscInt nvecs = 0; 8041 PetscBool reuse,reuser; 8042 8043 if (coarse_mat) reuse = PETSC_TRUE; 8044 else reuse = PETSC_FALSE; 8045 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8046 vp[0] = NULL; 8047 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8048 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8049 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8050 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8051 nvecs = 1; 8052 8053 if (pcbddc->divudotp) { 8054 Mat B,loc_divudotp; 8055 Vec v,p; 8056 IS dummy; 8057 PetscInt np; 8058 8059 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8060 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8061 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8062 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8063 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8064 ierr = VecSet(p,1.);CHKERRQ(ierr); 8065 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8066 ierr = VecDestroy(&p);CHKERRQ(ierr); 8067 ierr = MatDestroy(&B);CHKERRQ(ierr); 8068 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8069 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8070 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8071 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8072 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8073 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8074 ierr = VecDestroy(&v);CHKERRQ(ierr); 8075 } 8076 } 8077 if (reuser) { 8078 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8079 } else { 8080 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8081 } 8082 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8083 PetscScalar *arraym,*arrayv; 8084 PetscInt nl; 8085 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8086 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8087 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8088 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 8089 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 8090 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 8091 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8092 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8093 } else { 8094 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8095 } 8096 } else { 8097 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8098 } 8099 if (coarse_mat_is || coarse_mat) { 8100 PetscMPIInt size; 8101 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 8102 if (!multilevel_allowed) { 8103 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8104 } else { 8105 Mat A; 8106 8107 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8108 if (coarse_mat_is) { 8109 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8110 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8111 coarse_mat = coarse_mat_is; 8112 } 8113 /* be sure we don't have MatSeqDENSE as local mat */ 8114 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 8115 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 8116 } 8117 } 8118 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8119 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8120 8121 /* create local to global scatters for coarse problem */ 8122 if (compute_vecs) { 8123 PetscInt lrows; 8124 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8125 if (coarse_mat) { 8126 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8127 } else { 8128 lrows = 0; 8129 } 8130 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8131 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8132 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 8133 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8134 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8135 } 8136 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8137 8138 /* set defaults for coarse KSP and PC */ 8139 if (multilevel_allowed) { 8140 coarse_ksp_type = KSPRICHARDSON; 8141 coarse_pc_type = PCBDDC; 8142 } else { 8143 coarse_ksp_type = KSPPREONLY; 8144 coarse_pc_type = PCREDUNDANT; 8145 } 8146 8147 /* print some info if requested */ 8148 if (pcbddc->dbg_flag) { 8149 if (!multilevel_allowed) { 8150 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8151 if (multilevel_requested) { 8152 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8153 } else if (pcbddc->max_levels) { 8154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 8155 } 8156 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8157 } 8158 } 8159 8160 /* communicate coarse discrete gradient */ 8161 coarseG = NULL; 8162 if (pcbddc->nedcG && multilevel_allowed) { 8163 MPI_Comm ccomm; 8164 if (coarse_mat) { 8165 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8166 } else { 8167 ccomm = MPI_COMM_NULL; 8168 } 8169 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8170 } 8171 8172 /* create the coarse KSP object only once with defaults */ 8173 if (coarse_mat) { 8174 PetscBool isredundant,isnn,isbddc; 8175 PetscViewer dbg_viewer = NULL; 8176 8177 if (pcbddc->dbg_flag) { 8178 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8179 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8180 } 8181 if (!pcbddc->coarse_ksp) { 8182 char prefix[256],str_level[16]; 8183 size_t len; 8184 8185 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8186 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8187 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8188 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8189 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8190 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8191 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8192 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8193 /* TODO is this logic correct? should check for coarse_mat type */ 8194 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8195 /* prefix */ 8196 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8197 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8198 if (!pcbddc->current_level) { 8199 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8200 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8201 } else { 8202 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8203 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8204 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8205 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8206 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8207 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8208 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8209 } 8210 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8211 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8212 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8213 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8214 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8215 /* allow user customization */ 8216 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8217 } 8218 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8219 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8220 if (nisdofs) { 8221 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8222 for (i=0;i<nisdofs;i++) { 8223 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8224 } 8225 } 8226 if (nisneu) { 8227 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8228 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8229 } 8230 if (nisvert) { 8231 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8232 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8233 } 8234 if (coarseG) { 8235 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8236 } 8237 8238 /* get some info after set from options */ 8239 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8240 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8241 if (isbddc && !multilevel_allowed) { 8242 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8243 isbddc = PETSC_FALSE; 8244 } 8245 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8246 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8247 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8248 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8249 isbddc = PETSC_TRUE; 8250 } 8251 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8252 if (isredundant) { 8253 KSP inner_ksp; 8254 PC inner_pc; 8255 8256 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8257 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8258 } 8259 8260 /* parameters which miss an API */ 8261 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8262 if (isbddc) { 8263 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8264 8265 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8266 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8267 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8268 if (pcbddc_coarse->benign_saddle_point) { 8269 Mat coarsedivudotp_is; 8270 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8271 IS row,col; 8272 const PetscInt *gidxs; 8273 PetscInt n,st,M,N; 8274 8275 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8276 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8277 st = st-n; 8278 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8279 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8280 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8281 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8282 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8283 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8284 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8285 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8286 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8287 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8288 ierr = ISDestroy(&row);CHKERRQ(ierr); 8289 ierr = ISDestroy(&col);CHKERRQ(ierr); 8290 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8291 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8292 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8293 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8294 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8295 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8296 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8297 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8298 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8299 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8300 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8301 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8302 } 8303 } 8304 8305 /* propagate symmetry info of coarse matrix */ 8306 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8307 if (pc->pmat->symmetric_set) { 8308 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8309 } 8310 if (pc->pmat->hermitian_set) { 8311 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8312 } 8313 if (pc->pmat->spd_set) { 8314 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8315 } 8316 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8317 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8318 } 8319 /* set operators */ 8320 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8321 if (pcbddc->dbg_flag) { 8322 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8323 } 8324 } 8325 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8326 ierr = PetscFree(isarray);CHKERRQ(ierr); 8327 #if 0 8328 { 8329 PetscViewer viewer; 8330 char filename[256]; 8331 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8332 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8333 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8334 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8335 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8336 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8337 } 8338 #endif 8339 8340 if (pcbddc->coarse_ksp) { 8341 Vec crhs,csol; 8342 8343 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8344 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8345 if (!csol) { 8346 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8347 } 8348 if (!crhs) { 8349 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8350 } 8351 } 8352 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8353 8354 /* compute null space for coarse solver if the benign trick has been requested */ 8355 if (pcbddc->benign_null) { 8356 8357 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8358 for (i=0;i<pcbddc->benign_n;i++) { 8359 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8360 } 8361 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8362 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8363 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8364 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8365 if (coarse_mat) { 8366 Vec nullv; 8367 PetscScalar *array,*array2; 8368 PetscInt nl; 8369 8370 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8371 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8372 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8373 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8374 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8375 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8376 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8377 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8378 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8379 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8380 } 8381 } 8382 8383 if (pcbddc->coarse_ksp) { 8384 PetscBool ispreonly; 8385 8386 if (CoarseNullSpace) { 8387 PetscBool isnull; 8388 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8389 if (isnull) { 8390 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8391 } 8392 /* TODO: add local nullspaces (if any) */ 8393 } 8394 /* setup coarse ksp */ 8395 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8396 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8397 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8398 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8399 KSP check_ksp; 8400 KSPType check_ksp_type; 8401 PC check_pc; 8402 Vec check_vec,coarse_vec; 8403 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8404 PetscInt its; 8405 PetscBool compute_eigs; 8406 PetscReal *eigs_r,*eigs_c; 8407 PetscInt neigs; 8408 const char *prefix; 8409 8410 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8411 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8412 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8413 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8414 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8415 /* prevent from setup unneeded object */ 8416 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8417 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8418 if (ispreonly) { 8419 check_ksp_type = KSPPREONLY; 8420 compute_eigs = PETSC_FALSE; 8421 } else { 8422 check_ksp_type = KSPGMRES; 8423 compute_eigs = PETSC_TRUE; 8424 } 8425 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8426 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8427 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8428 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8429 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8430 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8431 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8432 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8433 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8434 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8435 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8436 /* create random vec */ 8437 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8438 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8439 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8440 /* solve coarse problem */ 8441 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8442 /* set eigenvalue estimation if preonly has not been requested */ 8443 if (compute_eigs) { 8444 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8445 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8446 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8447 if (neigs) { 8448 lambda_max = eigs_r[neigs-1]; 8449 lambda_min = eigs_r[0]; 8450 if (pcbddc->use_coarse_estimates) { 8451 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8452 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8453 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8454 } 8455 } 8456 } 8457 } 8458 8459 /* check coarse problem residual error */ 8460 if (pcbddc->dbg_flag) { 8461 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8462 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8463 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8464 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8465 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8466 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8467 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8468 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8469 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8470 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8471 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8472 if (CoarseNullSpace) { 8473 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8474 } 8475 if (compute_eigs) { 8476 PetscReal lambda_max_s,lambda_min_s; 8477 KSPConvergedReason reason; 8478 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8479 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8480 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8481 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8482 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8483 for (i=0;i<neigs;i++) { 8484 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8485 } 8486 } 8487 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8488 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8489 } 8490 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8491 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8492 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8493 if (compute_eigs) { 8494 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8495 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8496 } 8497 } 8498 } 8499 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8500 /* print additional info */ 8501 if (pcbddc->dbg_flag) { 8502 /* waits until all processes reaches this point */ 8503 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8504 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8505 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8506 } 8507 8508 /* free memory */ 8509 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8510 PetscFunctionReturn(0); 8511 } 8512 8513 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8514 { 8515 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8516 PC_IS* pcis = (PC_IS*)pc->data; 8517 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8518 IS subset,subset_mult,subset_n; 8519 PetscInt local_size,coarse_size=0; 8520 PetscInt *local_primal_indices=NULL; 8521 const PetscInt *t_local_primal_indices; 8522 PetscErrorCode ierr; 8523 8524 PetscFunctionBegin; 8525 /* Compute global number of coarse dofs */ 8526 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8527 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8528 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8529 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8530 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8531 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8532 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8533 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8534 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8535 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8536 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8537 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8538 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8539 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8540 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8541 8542 /* check numbering */ 8543 if (pcbddc->dbg_flag) { 8544 PetscScalar coarsesum,*array,*array2; 8545 PetscInt i; 8546 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8547 8548 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8550 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8551 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8552 /* counter */ 8553 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8554 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8555 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8556 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8557 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8558 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8559 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8560 for (i=0;i<pcbddc->local_primal_size;i++) { 8561 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8562 } 8563 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8564 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8565 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8566 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8567 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8568 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8569 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8570 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8571 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8572 for (i=0;i<pcis->n;i++) { 8573 if (array[i] != 0.0 && array[i] != array2[i]) { 8574 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8575 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8576 set_error = PETSC_TRUE; 8577 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8578 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8579 } 8580 } 8581 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8582 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8583 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8584 for (i=0;i<pcis->n;i++) { 8585 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8586 } 8587 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8588 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8589 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8590 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8591 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8592 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8593 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8594 PetscInt *gidxs; 8595 8596 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8597 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8598 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8599 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8601 for (i=0;i<pcbddc->local_primal_size;i++) { 8602 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8603 } 8604 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8605 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8606 } 8607 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8608 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8609 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8610 } 8611 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8612 /* get back data */ 8613 *coarse_size_n = coarse_size; 8614 *local_primal_indices_n = local_primal_indices; 8615 PetscFunctionReturn(0); 8616 } 8617 8618 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8619 { 8620 IS localis_t; 8621 PetscInt i,lsize,*idxs,n; 8622 PetscScalar *vals; 8623 PetscErrorCode ierr; 8624 8625 PetscFunctionBegin; 8626 /* get indices in local ordering exploiting local to global map */ 8627 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8628 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8629 for (i=0;i<lsize;i++) vals[i] = 1.0; 8630 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8631 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8632 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8633 if (idxs) { /* multilevel guard */ 8634 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8635 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8636 } 8637 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8638 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8639 ierr = PetscFree(vals);CHKERRQ(ierr); 8640 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8641 /* now compute set in local ordering */ 8642 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8643 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8644 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8645 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8646 for (i=0,lsize=0;i<n;i++) { 8647 if (PetscRealPart(vals[i]) > 0.5) { 8648 lsize++; 8649 } 8650 } 8651 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8652 for (i=0,lsize=0;i<n;i++) { 8653 if (PetscRealPart(vals[i]) > 0.5) { 8654 idxs[lsize++] = i; 8655 } 8656 } 8657 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8658 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8659 *localis = localis_t; 8660 PetscFunctionReturn(0); 8661 } 8662 8663 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8664 { 8665 PC_IS *pcis=(PC_IS*)pc->data; 8666 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8667 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8668 Mat S_j; 8669 PetscInt *used_xadj,*used_adjncy; 8670 PetscBool free_used_adj; 8671 PetscErrorCode ierr; 8672 8673 PetscFunctionBegin; 8674 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8675 free_used_adj = PETSC_FALSE; 8676 if (pcbddc->sub_schurs_layers == -1) { 8677 used_xadj = NULL; 8678 used_adjncy = NULL; 8679 } else { 8680 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8681 used_xadj = pcbddc->mat_graph->xadj; 8682 used_adjncy = pcbddc->mat_graph->adjncy; 8683 } else if (pcbddc->computed_rowadj) { 8684 used_xadj = pcbddc->mat_graph->xadj; 8685 used_adjncy = pcbddc->mat_graph->adjncy; 8686 } else { 8687 PetscBool flg_row=PETSC_FALSE; 8688 const PetscInt *xadj,*adjncy; 8689 PetscInt nvtxs; 8690 8691 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8692 if (flg_row) { 8693 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8694 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8695 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8696 free_used_adj = PETSC_TRUE; 8697 } else { 8698 pcbddc->sub_schurs_layers = -1; 8699 used_xadj = NULL; 8700 used_adjncy = NULL; 8701 } 8702 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8703 } 8704 } 8705 8706 /* setup sub_schurs data */ 8707 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8708 if (!sub_schurs->schur_explicit) { 8709 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8710 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8711 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8712 } else { 8713 Mat change = NULL; 8714 Vec scaling = NULL; 8715 IS change_primal = NULL, iP; 8716 PetscInt benign_n; 8717 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8718 PetscBool isseqaij,need_change = PETSC_FALSE; 8719 PetscBool discrete_harmonic = PETSC_FALSE; 8720 8721 if (!pcbddc->use_vertices && reuse_solvers) { 8722 PetscInt n_vertices; 8723 8724 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8725 reuse_solvers = (PetscBool)!n_vertices; 8726 } 8727 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8728 if (!isseqaij) { 8729 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8730 if (matis->A == pcbddc->local_mat) { 8731 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8732 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8733 } else { 8734 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8735 } 8736 } 8737 if (!pcbddc->benign_change_explicit) { 8738 benign_n = pcbddc->benign_n; 8739 } else { 8740 benign_n = 0; 8741 } 8742 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8743 We need a global reduction to avoid possible deadlocks. 8744 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8745 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8746 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8747 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8748 need_change = (PetscBool)(!need_change); 8749 } 8750 /* If the user defines additional constraints, we import them here. 8751 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8752 if (need_change) { 8753 PC_IS *pcisf; 8754 PC_BDDC *pcbddcf; 8755 PC pcf; 8756 8757 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8758 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8759 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8760 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8761 8762 /* hacks */ 8763 pcisf = (PC_IS*)pcf->data; 8764 pcisf->is_B_local = pcis->is_B_local; 8765 pcisf->vec1_N = pcis->vec1_N; 8766 pcisf->BtoNmap = pcis->BtoNmap; 8767 pcisf->n = pcis->n; 8768 pcisf->n_B = pcis->n_B; 8769 pcbddcf = (PC_BDDC*)pcf->data; 8770 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8771 pcbddcf->mat_graph = pcbddc->mat_graph; 8772 pcbddcf->use_faces = PETSC_TRUE; 8773 pcbddcf->use_change_of_basis = PETSC_TRUE; 8774 pcbddcf->use_change_on_faces = PETSC_TRUE; 8775 pcbddcf->use_qr_single = PETSC_TRUE; 8776 pcbddcf->fake_change = PETSC_TRUE; 8777 8778 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8779 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8780 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8781 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8782 change = pcbddcf->ConstraintMatrix; 8783 pcbddcf->ConstraintMatrix = NULL; 8784 8785 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8786 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8787 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8788 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8789 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8790 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8791 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8792 pcf->ops->destroy = NULL; 8793 pcf->ops->reset = NULL; 8794 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8795 } 8796 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8797 8798 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8799 if (iP) { 8800 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8801 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8802 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8803 } 8804 if (discrete_harmonic) { 8805 Mat A; 8806 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8807 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8808 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8809 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8810 ierr = MatDestroy(&A);CHKERRQ(ierr); 8811 } else { 8812 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8813 } 8814 ierr = MatDestroy(&change);CHKERRQ(ierr); 8815 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8816 } 8817 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8818 8819 /* free adjacency */ 8820 if (free_used_adj) { 8821 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8822 } 8823 PetscFunctionReturn(0); 8824 } 8825 8826 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8827 { 8828 PC_IS *pcis=(PC_IS*)pc->data; 8829 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8830 PCBDDCGraph graph; 8831 PetscErrorCode ierr; 8832 8833 PetscFunctionBegin; 8834 /* attach interface graph for determining subsets */ 8835 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8836 IS verticesIS,verticescomm; 8837 PetscInt vsize,*idxs; 8838 8839 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8840 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8841 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8842 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8843 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8844 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8845 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8846 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8847 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8848 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8849 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8850 } else { 8851 graph = pcbddc->mat_graph; 8852 } 8853 /* print some info */ 8854 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8855 IS vertices; 8856 PetscInt nv,nedges,nfaces; 8857 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8858 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8859 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8860 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8861 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8862 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8863 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8864 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8865 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8866 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8867 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8868 } 8869 8870 /* sub_schurs init */ 8871 if (!pcbddc->sub_schurs) { 8872 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8873 } 8874 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8875 8876 /* free graph struct */ 8877 if (pcbddc->sub_schurs_rebuild) { 8878 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8879 } 8880 PetscFunctionReturn(0); 8881 } 8882 8883 PetscErrorCode PCBDDCCheckOperator(PC pc) 8884 { 8885 PC_IS *pcis=(PC_IS*)pc->data; 8886 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8887 PetscErrorCode ierr; 8888 8889 PetscFunctionBegin; 8890 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8891 IS zerodiag = NULL; 8892 Mat S_j,B0_B=NULL; 8893 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8894 PetscScalar *p0_check,*array,*array2; 8895 PetscReal norm; 8896 PetscInt i; 8897 8898 /* B0 and B0_B */ 8899 if (zerodiag) { 8900 IS dummy; 8901 8902 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8903 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8904 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8905 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8906 } 8907 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8908 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8909 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8910 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8911 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8912 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8913 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8914 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8915 /* S_j */ 8916 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8917 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8918 8919 /* mimic vector in \widetilde{W}_\Gamma */ 8920 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8921 /* continuous in primal space */ 8922 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8923 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8924 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8925 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8926 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8927 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8928 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8929 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8930 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8931 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8932 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8933 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8934 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8935 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8936 8937 /* assemble rhs for coarse problem */ 8938 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8939 /* local with Schur */ 8940 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8941 if (zerodiag) { 8942 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8943 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8944 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8945 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8946 } 8947 /* sum on primal nodes the local contributions */ 8948 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8949 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8950 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8951 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8952 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8953 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8954 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8955 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8956 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8957 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8958 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8959 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8960 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8961 /* scale primal nodes (BDDC sums contibutions) */ 8962 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8963 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8964 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8965 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8966 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8967 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8968 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8969 /* global: \widetilde{B0}_B w_\Gamma */ 8970 if (zerodiag) { 8971 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8972 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8973 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8974 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8975 } 8976 /* BDDC */ 8977 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8978 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8979 8980 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8981 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8982 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8983 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8984 for (i=0;i<pcbddc->benign_n;i++) { 8985 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8986 } 8987 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8988 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8989 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8990 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8991 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8992 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8993 } 8994 PetscFunctionReturn(0); 8995 } 8996 8997 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8998 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8999 { 9000 Mat At; 9001 IS rows; 9002 PetscInt rst,ren; 9003 PetscErrorCode ierr; 9004 PetscLayout rmap; 9005 9006 PetscFunctionBegin; 9007 rst = ren = 0; 9008 if (ccomm != MPI_COMM_NULL) { 9009 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9010 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9011 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9012 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9013 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9014 } 9015 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9016 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9017 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9018 9019 if (ccomm != MPI_COMM_NULL) { 9020 Mat_MPIAIJ *a,*b; 9021 IS from,to; 9022 Vec gvec; 9023 PetscInt lsize; 9024 9025 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9026 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9027 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9028 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9029 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9030 a = (Mat_MPIAIJ*)At->data; 9031 b = (Mat_MPIAIJ*)(*B)->data; 9032 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9033 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9034 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9035 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9036 b->A = a->A; 9037 b->B = a->B; 9038 9039 b->donotstash = a->donotstash; 9040 b->roworiented = a->roworiented; 9041 b->rowindices = 0; 9042 b->rowvalues = 0; 9043 b->getrowactive = PETSC_FALSE; 9044 9045 (*B)->rmap = rmap; 9046 (*B)->factortype = A->factortype; 9047 (*B)->assembled = PETSC_TRUE; 9048 (*B)->insertmode = NOT_SET_VALUES; 9049 (*B)->preallocated = PETSC_TRUE; 9050 9051 if (a->colmap) { 9052 #if defined(PETSC_USE_CTABLE) 9053 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9054 #else 9055 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9056 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9057 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9058 #endif 9059 } else b->colmap = 0; 9060 if (a->garray) { 9061 PetscInt len; 9062 len = a->B->cmap->n; 9063 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9064 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9065 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 9066 } else b->garray = 0; 9067 9068 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9069 b->lvec = a->lvec; 9070 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9071 9072 /* cannot use VecScatterCopy */ 9073 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9074 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9075 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9076 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9077 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9078 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9079 ierr = ISDestroy(&from);CHKERRQ(ierr); 9080 ierr = ISDestroy(&to);CHKERRQ(ierr); 9081 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9082 } 9083 ierr = MatDestroy(&At);CHKERRQ(ierr); 9084 PetscFunctionReturn(0); 9085 } 9086