1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 PetscScalar *vals; 1518 const PetscScalar *array; 1519 PetscInt i,maxneighs,maxsize; 1520 PetscInt n_neigh,*neigh,*n_shared,**shared; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1526 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1527 if (!maxneighs) { 1528 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1529 *nnsp = NULL; 1530 PetscFunctionReturn(0); 1531 } 1532 maxsize = 0; 1533 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1534 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1535 /* create vectors to hold quadrature weights */ 1536 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1537 if (!transpose) { 1538 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1539 } else { 1540 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1541 } 1542 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1543 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1544 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1545 for (i=0;i<maxneighs;i++) { 1546 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1547 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1548 } 1549 1550 /* compute local quad vec */ 1551 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1552 if (!transpose) { 1553 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1554 } else { 1555 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1556 } 1557 ierr = VecSet(p,1.);CHKERRQ(ierr); 1558 if (!transpose) { 1559 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1560 } else { 1561 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1562 } 1563 if (vl2l) { 1564 Mat lA; 1565 VecScatter sc; 1566 1567 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1568 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1569 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1570 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1573 } else { 1574 vins = v; 1575 } 1576 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1577 ierr = VecDestroy(&p);CHKERRQ(ierr); 1578 1579 /* insert in global quadrature vecs */ 1580 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1581 for (i=0;i<n_neigh;i++) { 1582 const PetscInt *idxs; 1583 PetscInt idx,nn,j; 1584 1585 idxs = shared[i]; 1586 nn = n_shared[i]; 1587 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1588 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1589 idx = -(idx+1); 1590 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1591 } 1592 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1593 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1594 if (vl2l) { 1595 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1596 } 1597 ierr = VecDestroy(&v);CHKERRQ(ierr); 1598 ierr = PetscFree(vals);CHKERRQ(ierr); 1599 1600 /* assemble near null space */ 1601 for (i=0;i<maxneighs;i++) { 1602 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1603 } 1604 for (i=0;i<maxneighs;i++) { 1605 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1606 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1607 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1608 } 1609 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1614 { 1615 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1616 PetscErrorCode ierr; 1617 1618 PetscFunctionBegin; 1619 if (primalv) { 1620 if (pcbddc->user_primal_vertices_local) { 1621 IS list[2], newp; 1622 1623 list[0] = primalv; 1624 list[1] = pcbddc->user_primal_vertices_local; 1625 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1626 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1627 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1628 pcbddc->user_primal_vertices_local = newp; 1629 } else { 1630 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1631 } 1632 } 1633 PetscFunctionReturn(0); 1634 } 1635 1636 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1637 { 1638 PetscInt f, *comp = (PetscInt *)ctx; 1639 1640 PetscFunctionBegin; 1641 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1646 { 1647 PetscErrorCode ierr; 1648 Vec local,global; 1649 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1650 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1651 PetscBool monolithic = PETSC_FALSE; 1652 1653 PetscFunctionBegin; 1654 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1655 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1656 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1657 /* need to convert from global to local topology information and remove references to information in global ordering */ 1658 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1659 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1660 if (monolithic) { /* just get block size to properly compute vertices */ 1661 if (pcbddc->vertex_size == 1) { 1662 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1663 } 1664 goto boundary; 1665 } 1666 1667 if (pcbddc->user_provided_isfordofs) { 1668 if (pcbddc->n_ISForDofs) { 1669 PetscInt i; 1670 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1671 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1672 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1673 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1674 } 1675 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1676 pcbddc->n_ISForDofs = 0; 1677 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1678 } 1679 } else { 1680 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1681 DM dm; 1682 1683 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1684 if (!dm) { 1685 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1686 } 1687 if (dm) { 1688 IS *fields; 1689 PetscInt nf,i; 1690 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1691 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1692 for (i=0;i<nf;i++) { 1693 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1694 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1695 } 1696 ierr = PetscFree(fields);CHKERRQ(ierr); 1697 pcbddc->n_ISForDofsLocal = nf; 1698 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1699 PetscContainer c; 1700 1701 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1702 if (c) { 1703 MatISLocalFields lf; 1704 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1705 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1706 } else { /* fallback, create the default fields if bs > 1 */ 1707 PetscInt i, n = matis->A->rmap->n; 1708 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1709 if (i > 1) { 1710 pcbddc->n_ISForDofsLocal = i; 1711 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1712 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1713 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1714 } 1715 } 1716 } 1717 } 1718 } else { 1719 PetscInt i; 1720 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1721 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1722 } 1723 } 1724 } 1725 1726 boundary: 1727 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1728 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1729 } else if (pcbddc->DirichletBoundariesLocal) { 1730 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1731 } 1732 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1733 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1734 } else if (pcbddc->NeumannBoundariesLocal) { 1735 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1736 } 1737 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1738 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1739 } 1740 ierr = VecDestroy(&global);CHKERRQ(ierr); 1741 ierr = VecDestroy(&local);CHKERRQ(ierr); 1742 /* detect local disconnected subdomains if requested (use matis->A) */ 1743 if (pcbddc->detect_disconnected) { 1744 IS primalv = NULL; 1745 PetscInt i; 1746 1747 for (i=0;i<pcbddc->n_local_subs;i++) { 1748 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1749 } 1750 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1751 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1752 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1753 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1754 } 1755 /* early stage corner detection */ 1756 { 1757 DM dm; 1758 1759 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1760 if (dm) { 1761 PetscBool isda; 1762 1763 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1764 if (isda) { 1765 ISLocalToGlobalMapping l2l; 1766 IS corners; 1767 Mat lA; 1768 1769 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1770 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1771 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1772 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1773 if (l2l) { 1774 const PetscInt *idx; 1775 PetscInt bs,*idxout,n; 1776 1777 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1778 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1779 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1780 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1781 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1782 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1783 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1784 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1785 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1786 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1787 pcbddc->corner_selected = PETSC_TRUE; 1788 } else { /* not from DMDA */ 1789 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1790 } 1791 } 1792 } 1793 } 1794 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1795 DM dm; 1796 1797 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1798 if (!dm) { 1799 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1800 } 1801 if (dm) { 1802 Vec vcoords; 1803 PetscSection section; 1804 PetscReal *coords; 1805 PetscInt d,cdim,nl,nf,**ctxs; 1806 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1807 1808 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1809 ierr = DMGetDefaultSection(dm,§ion);CHKERRQ(ierr); 1810 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1811 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1812 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1813 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1814 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1815 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1816 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1817 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1818 for (d=0;d<cdim;d++) { 1819 PetscInt i; 1820 const PetscScalar *v; 1821 1822 for (i=0;i<nf;i++) ctxs[i][0] = d; 1823 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1824 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1825 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1826 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1827 } 1828 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1829 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1830 ierr = PetscFree(coords);CHKERRQ(ierr); 1831 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1832 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1833 } 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1839 { 1840 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1841 PetscErrorCode ierr; 1842 IS nis; 1843 const PetscInt *idxs; 1844 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1845 PetscBool *ld; 1846 1847 PetscFunctionBegin; 1848 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1849 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1850 if (mop == MPI_LAND) { 1851 /* init rootdata with true */ 1852 ld = (PetscBool*) matis->sf_rootdata; 1853 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1854 } else { 1855 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1856 } 1857 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1858 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1859 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1860 ld = (PetscBool*) matis->sf_leafdata; 1861 for (i=0;i<nd;i++) 1862 if (-1 < idxs[i] && idxs[i] < n) 1863 ld[idxs[i]] = PETSC_TRUE; 1864 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1865 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1866 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1867 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1868 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1869 if (mop == MPI_LAND) { 1870 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1871 } else { 1872 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1873 } 1874 for (i=0,nnd=0;i<n;i++) 1875 if (ld[i]) 1876 nidxs[nnd++] = i; 1877 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1878 ierr = ISDestroy(is);CHKERRQ(ierr); 1879 *is = nis; 1880 PetscFunctionReturn(0); 1881 } 1882 1883 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1884 { 1885 PC_IS *pcis = (PC_IS*)(pc->data); 1886 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1887 PetscErrorCode ierr; 1888 1889 PetscFunctionBegin; 1890 if (!pcbddc->benign_have_null) { 1891 PetscFunctionReturn(0); 1892 } 1893 if (pcbddc->ChangeOfBasisMatrix) { 1894 Vec swap; 1895 1896 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1897 swap = pcbddc->work_change; 1898 pcbddc->work_change = r; 1899 r = swap; 1900 } 1901 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1902 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1903 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1904 ierr = VecSet(z,0.);CHKERRQ(ierr); 1905 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1906 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1907 if (pcbddc->ChangeOfBasisMatrix) { 1908 pcbddc->work_change = r; 1909 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1910 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1916 { 1917 PCBDDCBenignMatMult_ctx ctx; 1918 PetscErrorCode ierr; 1919 PetscBool apply_right,apply_left,reset_x; 1920 1921 PetscFunctionBegin; 1922 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1923 if (transpose) { 1924 apply_right = ctx->apply_left; 1925 apply_left = ctx->apply_right; 1926 } else { 1927 apply_right = ctx->apply_right; 1928 apply_left = ctx->apply_left; 1929 } 1930 reset_x = PETSC_FALSE; 1931 if (apply_right) { 1932 const PetscScalar *ax; 1933 PetscInt nl,i; 1934 1935 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1936 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1937 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1938 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1939 for (i=0;i<ctx->benign_n;i++) { 1940 PetscScalar sum,val; 1941 const PetscInt *idxs; 1942 PetscInt nz,j; 1943 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1944 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1945 sum = 0.; 1946 if (ctx->apply_p0) { 1947 val = ctx->work[idxs[nz-1]]; 1948 for (j=0;j<nz-1;j++) { 1949 sum += ctx->work[idxs[j]]; 1950 ctx->work[idxs[j]] += val; 1951 } 1952 } else { 1953 for (j=0;j<nz-1;j++) { 1954 sum += ctx->work[idxs[j]]; 1955 } 1956 } 1957 ctx->work[idxs[nz-1]] -= sum; 1958 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1959 } 1960 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1961 reset_x = PETSC_TRUE; 1962 } 1963 if (transpose) { 1964 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1965 } else { 1966 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1967 } 1968 if (reset_x) { 1969 ierr = VecResetArray(x);CHKERRQ(ierr); 1970 } 1971 if (apply_left) { 1972 PetscScalar *ay; 1973 PetscInt i; 1974 1975 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1976 for (i=0;i<ctx->benign_n;i++) { 1977 PetscScalar sum,val; 1978 const PetscInt *idxs; 1979 PetscInt nz,j; 1980 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1981 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1982 val = -ay[idxs[nz-1]]; 1983 if (ctx->apply_p0) { 1984 sum = 0.; 1985 for (j=0;j<nz-1;j++) { 1986 sum += ay[idxs[j]]; 1987 ay[idxs[j]] += val; 1988 } 1989 ay[idxs[nz-1]] += sum; 1990 } else { 1991 for (j=0;j<nz-1;j++) { 1992 ay[idxs[j]] += val; 1993 } 1994 ay[idxs[nz-1]] = 0.; 1995 } 1996 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1997 } 1998 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2004 { 2005 PetscErrorCode ierr; 2006 2007 PetscFunctionBegin; 2008 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2013 { 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2022 { 2023 PC_IS *pcis = (PC_IS*)pc->data; 2024 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2025 PCBDDCBenignMatMult_ctx ctx; 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 if (!restore) { 2030 Mat A_IB,A_BI; 2031 PetscScalar *work; 2032 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2033 2034 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2035 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2036 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2037 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2038 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2039 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2040 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2041 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2042 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2043 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2044 ctx->apply_left = PETSC_TRUE; 2045 ctx->apply_right = PETSC_FALSE; 2046 ctx->apply_p0 = PETSC_FALSE; 2047 ctx->benign_n = pcbddc->benign_n; 2048 if (reuse) { 2049 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2050 ctx->free = PETSC_FALSE; 2051 } else { /* TODO: could be optimized for successive solves */ 2052 ISLocalToGlobalMapping N_to_D; 2053 PetscInt i; 2054 2055 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2056 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2057 for (i=0;i<pcbddc->benign_n;i++) { 2058 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2059 } 2060 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2061 ctx->free = PETSC_TRUE; 2062 } 2063 ctx->A = pcis->A_IB; 2064 ctx->work = work; 2065 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2066 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 pcis->A_IB = A_IB; 2069 2070 /* A_BI as A_IB^T */ 2071 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2072 pcbddc->benign_original_mat = pcis->A_BI; 2073 pcis->A_BI = A_BI; 2074 } else { 2075 if (!pcbddc->benign_original_mat) { 2076 PetscFunctionReturn(0); 2077 } 2078 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2079 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2080 pcis->A_IB = ctx->A; 2081 ctx->A = NULL; 2082 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2083 pcis->A_BI = pcbddc->benign_original_mat; 2084 pcbddc->benign_original_mat = NULL; 2085 if (ctx->free) { 2086 PetscInt i; 2087 for (i=0;i<ctx->benign_n;i++) { 2088 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2089 } 2090 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2091 } 2092 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2093 ierr = PetscFree(ctx);CHKERRQ(ierr); 2094 } 2095 PetscFunctionReturn(0); 2096 } 2097 2098 /* used just in bddc debug mode */ 2099 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2100 { 2101 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2102 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2103 Mat An; 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2108 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2109 if (is1) { 2110 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2111 ierr = MatDestroy(&An);CHKERRQ(ierr); 2112 } else { 2113 *B = An; 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /* TODO: add reuse flag */ 2119 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2120 { 2121 Mat Bt; 2122 PetscScalar *a,*bdata; 2123 const PetscInt *ii,*ij; 2124 PetscInt m,n,i,nnz,*bii,*bij; 2125 PetscBool flg_row; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2130 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2131 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2132 nnz = n; 2133 for (i=0;i<ii[n];i++) { 2134 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2135 } 2136 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2137 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2138 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2139 nnz = 0; 2140 bii[0] = 0; 2141 for (i=0;i<n;i++) { 2142 PetscInt j; 2143 for (j=ii[i];j<ii[i+1];j++) { 2144 PetscScalar entry = a[j]; 2145 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2146 bij[nnz] = ij[j]; 2147 bdata[nnz] = entry; 2148 nnz++; 2149 } 2150 } 2151 bii[i+1] = nnz; 2152 } 2153 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2154 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2155 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2156 { 2157 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2158 b->free_a = PETSC_TRUE; 2159 b->free_ij = PETSC_TRUE; 2160 } 2161 if (*B == A) { 2162 ierr = MatDestroy(&A);CHKERRQ(ierr); 2163 } 2164 *B = Bt; 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2169 { 2170 Mat B = NULL; 2171 DM dm; 2172 IS is_dummy,*cc_n; 2173 ISLocalToGlobalMapping l2gmap_dummy; 2174 PCBDDCGraph graph; 2175 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2176 PetscInt i,n; 2177 PetscInt *xadj,*adjncy; 2178 PetscBool isplex = PETSC_FALSE; 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 if (ncc) *ncc = 0; 2183 if (cc) *cc = NULL; 2184 if (primalv) *primalv = NULL; 2185 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2186 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2187 if (!dm) { 2188 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2189 } 2190 if (dm) { 2191 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2192 } 2193 if (isplex) { /* this code has been modified from plexpartition.c */ 2194 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2195 PetscInt *adj = NULL; 2196 IS cellNumbering; 2197 const PetscInt *cellNum; 2198 PetscBool useCone, useClosure; 2199 PetscSection section; 2200 PetscSegBuffer adjBuffer; 2201 PetscSF sfPoint; 2202 PetscErrorCode ierr; 2203 2204 PetscFunctionBegin; 2205 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2206 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2207 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2208 /* Build adjacency graph via a section/segbuffer */ 2209 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2210 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2211 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2212 /* Always use FVM adjacency to create partitioner graph */ 2213 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2214 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2215 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2216 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2217 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2218 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2219 for (n = 0, p = pStart; p < pEnd; p++) { 2220 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2221 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2222 adjSize = PETSC_DETERMINE; 2223 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2224 for (a = 0; a < adjSize; ++a) { 2225 const PetscInt point = adj[a]; 2226 if (pStart <= point && point < pEnd) { 2227 PetscInt *PETSC_RESTRICT pBuf; 2228 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2229 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2230 *pBuf = point; 2231 } 2232 } 2233 n++; 2234 } 2235 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2236 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2237 /* Derive CSR graph from section/segbuffer */ 2238 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2239 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2240 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2241 for (idx = 0, p = pStart; p < pEnd; p++) { 2242 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2243 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2244 } 2245 xadj[n] = size; 2246 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2247 /* Clean up */ 2248 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2249 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2250 ierr = PetscFree(adj);CHKERRQ(ierr); 2251 graph->xadj = xadj; 2252 graph->adjncy = adjncy; 2253 } else { 2254 Mat A; 2255 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2256 2257 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2258 if (!A->rmap->N || !A->cmap->N) { 2259 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2263 if (!isseqaij && filter) { 2264 PetscBool isseqdense; 2265 2266 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2267 if (!isseqdense) { 2268 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2269 } else { /* TODO: rectangular case and LDA */ 2270 PetscScalar *array; 2271 PetscReal chop=1.e-6; 2272 2273 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2274 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2275 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2276 for (i=0;i<n;i++) { 2277 PetscInt j; 2278 for (j=i+1;j<n;j++) { 2279 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2280 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2281 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2282 } 2283 } 2284 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2285 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2286 } 2287 } else { 2288 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2289 B = A; 2290 } 2291 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2292 2293 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2294 if (filter) { 2295 PetscScalar *data; 2296 PetscInt j,cum; 2297 2298 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2299 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2300 cum = 0; 2301 for (i=0;i<n;i++) { 2302 PetscInt t; 2303 2304 for (j=xadj[i];j<xadj[i+1];j++) { 2305 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2306 continue; 2307 } 2308 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2309 } 2310 t = xadj_filtered[i]; 2311 xadj_filtered[i] = cum; 2312 cum += t; 2313 } 2314 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2315 graph->xadj = xadj_filtered; 2316 graph->adjncy = adjncy_filtered; 2317 } else { 2318 graph->xadj = xadj; 2319 graph->adjncy = adjncy; 2320 } 2321 } 2322 /* compute local connected components using PCBDDCGraph */ 2323 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2324 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2325 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2326 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2327 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2328 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2329 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2330 2331 /* partial clean up */ 2332 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2333 if (B) { 2334 PetscBool flg_row; 2335 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2336 ierr = MatDestroy(&B);CHKERRQ(ierr); 2337 } 2338 if (isplex) { 2339 ierr = PetscFree(xadj);CHKERRQ(ierr); 2340 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2341 } 2342 2343 /* get back data */ 2344 if (isplex) { 2345 if (ncc) *ncc = graph->ncc; 2346 if (cc || primalv) { 2347 Mat A; 2348 PetscBT btv,btvt; 2349 PetscSection subSection; 2350 PetscInt *ids,cum,cump,*cids,*pids; 2351 2352 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2353 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2354 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2355 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2356 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2357 2358 cids[0] = 0; 2359 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2360 PetscInt j; 2361 2362 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2363 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2364 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2365 2366 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2367 for (k = 0; k < 2*size; k += 2) { 2368 PetscInt s, p = closure[k], off, dof, cdof; 2369 2370 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2371 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2372 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2373 for (s = 0; s < dof-cdof; s++) { 2374 if (PetscBTLookupSet(btvt,off+s)) continue; 2375 if (!PetscBTLookup(btv,off+s)) { 2376 ids[cum++] = off+s; 2377 } else { /* cross-vertex */ 2378 pids[cump++] = off+s; 2379 } 2380 } 2381 } 2382 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2383 } 2384 cids[i+1] = cum; 2385 /* mark dofs as already assigned */ 2386 for (j = cids[i]; j < cids[i+1]; j++) { 2387 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2388 } 2389 } 2390 if (cc) { 2391 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2392 for (i = 0; i < graph->ncc; i++) { 2393 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2394 } 2395 *cc = cc_n; 2396 } 2397 if (primalv) { 2398 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2399 } 2400 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2401 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2402 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2403 } 2404 } else { 2405 if (ncc) *ncc = graph->ncc; 2406 if (cc) { 2407 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2408 for (i=0;i<graph->ncc;i++) { 2409 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2410 } 2411 *cc = cc_n; 2412 } 2413 } 2414 /* clean up graph */ 2415 graph->xadj = 0; 2416 graph->adjncy = 0; 2417 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2422 { 2423 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2424 PC_IS* pcis = (PC_IS*)(pc->data); 2425 IS dirIS = NULL; 2426 PetscInt i; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2431 if (zerodiag) { 2432 Mat A; 2433 Vec vec3_N; 2434 PetscScalar *vals; 2435 const PetscInt *idxs; 2436 PetscInt nz,*count; 2437 2438 /* p0 */ 2439 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2440 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2441 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2442 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2443 for (i=0;i<nz;i++) vals[i] = 1.; 2444 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2445 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2446 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2447 /* v_I */ 2448 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2449 for (i=0;i<nz;i++) vals[i] = 0.; 2450 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2451 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2452 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2453 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2454 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2455 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2456 if (dirIS) { 2457 PetscInt n; 2458 2459 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2460 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2461 for (i=0;i<n;i++) vals[i] = 0.; 2462 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2463 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2464 } 2465 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2466 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2467 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2468 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2469 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2470 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2471 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2472 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2473 ierr = PetscFree(vals);CHKERRQ(ierr); 2474 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2475 2476 /* there should not be any pressure dofs lying on the interface */ 2477 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2478 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2479 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2480 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2481 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2482 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2483 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2484 ierr = PetscFree(count);CHKERRQ(ierr); 2485 } 2486 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2487 2488 /* check PCBDDCBenignGetOrSetP0 */ 2489 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2490 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2491 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2492 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2493 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2494 for (i=0;i<pcbddc->benign_n;i++) { 2495 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2496 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2497 } 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2502 { 2503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2504 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2505 PetscInt nz,n; 2506 PetscInt *interior_dofs,n_interior_dofs,nneu; 2507 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2512 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2513 for (n=0;n<pcbddc->benign_n;n++) { 2514 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2515 } 2516 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2517 pcbddc->benign_n = 0; 2518 2519 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2520 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2521 Checks if all the pressure dofs in each subdomain have a zero diagonal 2522 If not, a change of basis on pressures is not needed 2523 since the local Schur complements are already SPD 2524 */ 2525 has_null_pressures = PETSC_TRUE; 2526 have_null = PETSC_TRUE; 2527 if (pcbddc->n_ISForDofsLocal) { 2528 IS iP = NULL; 2529 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2530 2531 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2532 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2533 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2534 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2535 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2536 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2537 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2538 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2539 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2540 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2541 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2542 if (iP) { 2543 IS newpressures; 2544 2545 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2546 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2547 pressures = newpressures; 2548 } 2549 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2550 if (!sorted) { 2551 ierr = ISSort(pressures);CHKERRQ(ierr); 2552 } 2553 } else { 2554 pressures = NULL; 2555 } 2556 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2557 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2558 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2559 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2560 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2561 if (!sorted) { 2562 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2563 } 2564 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2565 zerodiag_save = zerodiag; 2566 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2567 if (!nz) { 2568 if (n) have_null = PETSC_FALSE; 2569 has_null_pressures = PETSC_FALSE; 2570 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2571 } 2572 recompute_zerodiag = PETSC_FALSE; 2573 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2574 zerodiag_subs = NULL; 2575 pcbddc->benign_n = 0; 2576 n_interior_dofs = 0; 2577 interior_dofs = NULL; 2578 nneu = 0; 2579 if (pcbddc->NeumannBoundariesLocal) { 2580 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2581 } 2582 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2583 if (checkb) { /* need to compute interior nodes */ 2584 PetscInt n,i,j; 2585 PetscInt n_neigh,*neigh,*n_shared,**shared; 2586 PetscInt *iwork; 2587 2588 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2589 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2590 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2591 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2592 for (i=1;i<n_neigh;i++) 2593 for (j=0;j<n_shared[i];j++) 2594 iwork[shared[i][j]] += 1; 2595 for (i=0;i<n;i++) 2596 if (!iwork[i]) 2597 interior_dofs[n_interior_dofs++] = i; 2598 ierr = PetscFree(iwork);CHKERRQ(ierr); 2599 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2600 } 2601 if (has_null_pressures) { 2602 IS *subs; 2603 PetscInt nsubs,i,j,nl; 2604 const PetscInt *idxs; 2605 PetscScalar *array; 2606 Vec *work; 2607 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2608 2609 subs = pcbddc->local_subs; 2610 nsubs = pcbddc->n_local_subs; 2611 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2612 if (checkb) { 2613 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2614 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2615 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2616 /* work[0] = 1_p */ 2617 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2618 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2619 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2620 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2621 /* work[0] = 1_v */ 2622 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2623 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2624 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2625 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2626 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2627 } 2628 if (nsubs > 1) { 2629 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2630 for (i=0;i<nsubs;i++) { 2631 ISLocalToGlobalMapping l2g; 2632 IS t_zerodiag_subs; 2633 PetscInt nl; 2634 2635 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2636 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2637 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2638 if (nl) { 2639 PetscBool valid = PETSC_TRUE; 2640 2641 if (checkb) { 2642 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2643 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2644 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2645 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2646 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2647 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2648 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2649 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2650 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2651 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2652 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2653 for (j=0;j<n_interior_dofs;j++) { 2654 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2655 valid = PETSC_FALSE; 2656 break; 2657 } 2658 } 2659 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2660 } 2661 if (valid && nneu) { 2662 const PetscInt *idxs; 2663 PetscInt nzb; 2664 2665 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2666 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2667 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2668 if (nzb) valid = PETSC_FALSE; 2669 } 2670 if (valid && pressures) { 2671 IS t_pressure_subs; 2672 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2673 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2674 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2675 } 2676 if (valid) { 2677 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2678 pcbddc->benign_n++; 2679 } else { 2680 recompute_zerodiag = PETSC_TRUE; 2681 } 2682 } 2683 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2684 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2685 } 2686 } else { /* there's just one subdomain (or zero if they have not been detected */ 2687 PetscBool valid = PETSC_TRUE; 2688 2689 if (nneu) valid = PETSC_FALSE; 2690 if (valid && pressures) { 2691 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2692 } 2693 if (valid && checkb) { 2694 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2695 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2696 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2697 for (j=0;j<n_interior_dofs;j++) { 2698 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2699 valid = PETSC_FALSE; 2700 break; 2701 } 2702 } 2703 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2704 } 2705 if (valid) { 2706 pcbddc->benign_n = 1; 2707 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2708 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2709 zerodiag_subs[0] = zerodiag; 2710 } 2711 } 2712 if (checkb) { 2713 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2714 } 2715 } 2716 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2717 2718 if (!pcbddc->benign_n) { 2719 PetscInt n; 2720 2721 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2722 recompute_zerodiag = PETSC_FALSE; 2723 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2724 if (n) { 2725 has_null_pressures = PETSC_FALSE; 2726 have_null = PETSC_FALSE; 2727 } 2728 } 2729 2730 /* final check for null pressures */ 2731 if (zerodiag && pressures) { 2732 PetscInt nz,np; 2733 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2734 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2735 if (nz != np) have_null = PETSC_FALSE; 2736 } 2737 2738 if (recompute_zerodiag) { 2739 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2740 if (pcbddc->benign_n == 1) { 2741 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2742 zerodiag = zerodiag_subs[0]; 2743 } else { 2744 PetscInt i,nzn,*new_idxs; 2745 2746 nzn = 0; 2747 for (i=0;i<pcbddc->benign_n;i++) { 2748 PetscInt ns; 2749 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2750 nzn += ns; 2751 } 2752 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2753 nzn = 0; 2754 for (i=0;i<pcbddc->benign_n;i++) { 2755 PetscInt ns,*idxs; 2756 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2757 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2758 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2759 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2760 nzn += ns; 2761 } 2762 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2763 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2764 } 2765 have_null = PETSC_FALSE; 2766 } 2767 2768 /* Prepare matrix to compute no-net-flux */ 2769 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2770 Mat A,loc_divudotp; 2771 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2772 IS row,col,isused = NULL; 2773 PetscInt M,N,n,st,n_isused; 2774 2775 if (pressures) { 2776 isused = pressures; 2777 } else { 2778 isused = zerodiag_save; 2779 } 2780 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2781 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2782 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2783 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2784 n_isused = 0; 2785 if (isused) { 2786 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2787 } 2788 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2789 st = st-n_isused; 2790 if (n) { 2791 const PetscInt *gidxs; 2792 2793 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2794 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2795 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2796 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2797 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2798 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2799 } else { 2800 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2801 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2802 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2803 } 2804 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2805 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2806 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2807 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2808 ierr = ISDestroy(&row);CHKERRQ(ierr); 2809 ierr = ISDestroy(&col);CHKERRQ(ierr); 2810 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2811 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2812 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2813 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2814 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2815 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2816 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2817 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2818 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2820 } 2821 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2822 2823 /* change of basis and p0 dofs */ 2824 if (has_null_pressures) { 2825 IS zerodiagc; 2826 const PetscInt *idxs,*idxsc; 2827 PetscInt i,s,*nnz; 2828 2829 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2830 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2831 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2832 /* local change of basis for pressures */ 2833 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2834 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2835 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2836 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2837 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2838 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2839 for (i=0;i<pcbddc->benign_n;i++) { 2840 PetscInt nzs,j; 2841 2842 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2843 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2844 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2845 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2846 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2847 } 2848 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2849 ierr = PetscFree(nnz);CHKERRQ(ierr); 2850 /* set identity on velocities */ 2851 for (i=0;i<n-nz;i++) { 2852 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2853 } 2854 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2855 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2856 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2857 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2858 /* set change on pressures */ 2859 for (s=0;s<pcbddc->benign_n;s++) { 2860 PetscScalar *array; 2861 PetscInt nzs; 2862 2863 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2864 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2865 for (i=0;i<nzs-1;i++) { 2866 PetscScalar vals[2]; 2867 PetscInt cols[2]; 2868 2869 cols[0] = idxs[i]; 2870 cols[1] = idxs[nzs-1]; 2871 vals[0] = 1.; 2872 vals[1] = 1.; 2873 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 } 2875 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2876 for (i=0;i<nzs-1;i++) array[i] = -1.; 2877 array[nzs-1] = 1.; 2878 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2879 /* store local idxs for p0 */ 2880 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2881 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2882 ierr = PetscFree(array);CHKERRQ(ierr); 2883 } 2884 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 /* project if needed */ 2887 if (pcbddc->benign_change_explicit) { 2888 Mat M; 2889 2890 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2891 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2892 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2893 ierr = MatDestroy(&M);CHKERRQ(ierr); 2894 } 2895 /* store global idxs for p0 */ 2896 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2897 } 2898 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2899 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2900 2901 /* determines if the coarse solver will be singular or not */ 2902 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2903 /* determines if the problem has subdomains with 0 pressure block */ 2904 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2905 *zerodiaglocal = zerodiag; 2906 PetscFunctionReturn(0); 2907 } 2908 2909 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2910 { 2911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2912 PetscScalar *array; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 if (!pcbddc->benign_sf) { 2917 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2918 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2919 } 2920 if (get) { 2921 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2922 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2923 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2924 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2925 } else { 2926 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2927 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2928 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2929 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2930 } 2931 PetscFunctionReturn(0); 2932 } 2933 2934 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2935 { 2936 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2937 PetscErrorCode ierr; 2938 2939 PetscFunctionBegin; 2940 /* TODO: add error checking 2941 - avoid nested pop (or push) calls. 2942 - cannot push before pop. 2943 - cannot call this if pcbddc->local_mat is NULL 2944 */ 2945 if (!pcbddc->benign_n) { 2946 PetscFunctionReturn(0); 2947 } 2948 if (pop) { 2949 if (pcbddc->benign_change_explicit) { 2950 IS is_p0; 2951 MatReuse reuse; 2952 2953 /* extract B_0 */ 2954 reuse = MAT_INITIAL_MATRIX; 2955 if (pcbddc->benign_B0) { 2956 reuse = MAT_REUSE_MATRIX; 2957 } 2958 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2959 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2960 /* remove rows and cols from local problem */ 2961 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2962 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2963 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2964 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2965 } else { 2966 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2967 PetscScalar *vals; 2968 PetscInt i,n,*idxs_ins; 2969 2970 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2971 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2972 if (!pcbddc->benign_B0) { 2973 PetscInt *nnz; 2974 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2975 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2976 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2977 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2978 for (i=0;i<pcbddc->benign_n;i++) { 2979 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2980 nnz[i] = n - nnz[i]; 2981 } 2982 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2983 ierr = PetscFree(nnz);CHKERRQ(ierr); 2984 } 2985 2986 for (i=0;i<pcbddc->benign_n;i++) { 2987 PetscScalar *array; 2988 PetscInt *idxs,j,nz,cum; 2989 2990 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2991 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2992 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2993 for (j=0;j<nz;j++) vals[j] = 1.; 2994 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2995 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2996 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2997 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2998 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2999 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3000 cum = 0; 3001 for (j=0;j<n;j++) { 3002 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3003 vals[cum] = array[j]; 3004 idxs_ins[cum] = j; 3005 cum++; 3006 } 3007 } 3008 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3009 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3010 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3011 } 3012 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3013 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3014 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3015 } 3016 } else { /* push */ 3017 if (pcbddc->benign_change_explicit) { 3018 PetscInt i; 3019 3020 for (i=0;i<pcbddc->benign_n;i++) { 3021 PetscScalar *B0_vals; 3022 PetscInt *B0_cols,B0_ncol; 3023 3024 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3025 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3026 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3027 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3028 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3029 } 3030 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3031 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 } else { 3033 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3034 } 3035 } 3036 PetscFunctionReturn(0); 3037 } 3038 3039 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3040 { 3041 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3042 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3043 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3044 PetscBLASInt *B_iwork,*B_ifail; 3045 PetscScalar *work,lwork; 3046 PetscScalar *St,*S,*eigv; 3047 PetscScalar *Sarray,*Starray; 3048 PetscReal *eigs,thresh,lthresh,uthresh; 3049 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3050 PetscBool allocated_S_St; 3051 #if defined(PETSC_USE_COMPLEX) 3052 PetscReal *rwork; 3053 #endif 3054 PetscErrorCode ierr; 3055 3056 PetscFunctionBegin; 3057 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3058 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3059 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3060 3061 if (pcbddc->dbg_flag) { 3062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3065 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3066 } 3067 3068 if (pcbddc->dbg_flag) { 3069 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3070 } 3071 3072 /* max size of subsets */ 3073 mss = 0; 3074 for (i=0;i<sub_schurs->n_subs;i++) { 3075 PetscInt subset_size; 3076 3077 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3078 mss = PetscMax(mss,subset_size); 3079 } 3080 3081 /* min/max and threshold */ 3082 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3083 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3084 nmax = PetscMax(nmin,nmax); 3085 allocated_S_St = PETSC_FALSE; 3086 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3087 allocated_S_St = PETSC_TRUE; 3088 } 3089 3090 /* allocate lapack workspace */ 3091 cum = cum2 = 0; 3092 maxneigs = 0; 3093 for (i=0;i<sub_schurs->n_subs;i++) { 3094 PetscInt n,subset_size; 3095 3096 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3097 n = PetscMin(subset_size,nmax); 3098 cum += subset_size; 3099 cum2 += subset_size*n; 3100 maxneigs = PetscMax(maxneigs,n); 3101 } 3102 if (mss) { 3103 if (sub_schurs->is_symmetric) { 3104 PetscBLASInt B_itype = 1; 3105 PetscBLASInt B_N = mss; 3106 PetscReal zero = 0.0; 3107 PetscReal eps = 0.0; /* dlamch? */ 3108 3109 B_lwork = -1; 3110 S = NULL; 3111 St = NULL; 3112 eigs = NULL; 3113 eigv = NULL; 3114 B_iwork = NULL; 3115 B_ifail = NULL; 3116 #if defined(PETSC_USE_COMPLEX) 3117 rwork = NULL; 3118 #endif 3119 thresh = 1.0; 3120 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3121 #if defined(PETSC_USE_COMPLEX) 3122 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3123 #else 3124 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3125 #endif 3126 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3127 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3128 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3129 } else { 3130 lwork = 0; 3131 } 3132 3133 nv = 0; 3134 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3135 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3136 } 3137 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3138 if (allocated_S_St) { 3139 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3140 } 3141 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3142 #if defined(PETSC_USE_COMPLEX) 3143 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3144 #endif 3145 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3146 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3147 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3148 nv+cum,&pcbddc->adaptive_constraints_idxs, 3149 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3150 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3151 3152 maxneigs = 0; 3153 cum = cumarray = 0; 3154 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3155 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3156 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3157 const PetscInt *idxs; 3158 3159 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3160 for (cum=0;cum<nv;cum++) { 3161 pcbddc->adaptive_constraints_n[cum] = 1; 3162 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3163 pcbddc->adaptive_constraints_data[cum] = 1.0; 3164 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3165 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3166 } 3167 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3168 } 3169 3170 if (mss) { /* multilevel */ 3171 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3172 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3173 } 3174 3175 lthresh = pcbddc->adaptive_threshold[0]; 3176 uthresh = pcbddc->adaptive_threshold[1]; 3177 for (i=0;i<sub_schurs->n_subs;i++) { 3178 const PetscInt *idxs; 3179 PetscReal upper,lower; 3180 PetscInt j,subset_size,eigs_start = 0; 3181 PetscBLASInt B_N; 3182 PetscBool same_data = PETSC_FALSE; 3183 PetscBool scal = PETSC_FALSE; 3184 3185 if (pcbddc->use_deluxe_scaling) { 3186 upper = PETSC_MAX_REAL; 3187 lower = uthresh; 3188 } else { 3189 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3190 upper = 1./uthresh; 3191 lower = 0.; 3192 } 3193 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3194 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3195 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3196 /* this is experimental: we assume the dofs have been properly grouped to have 3197 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3198 if (!sub_schurs->is_posdef) { 3199 Mat T; 3200 3201 for (j=0;j<subset_size;j++) { 3202 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3203 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3204 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3205 ierr = MatDestroy(&T);CHKERRQ(ierr); 3206 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3207 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3208 ierr = MatDestroy(&T);CHKERRQ(ierr); 3209 if (sub_schurs->change_primal_sub) { 3210 PetscInt nz,k; 3211 const PetscInt *idxs; 3212 3213 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3214 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3215 for (k=0;k<nz;k++) { 3216 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3217 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3218 } 3219 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3220 } 3221 scal = PETSC_TRUE; 3222 break; 3223 } 3224 } 3225 } 3226 3227 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3228 if (sub_schurs->is_symmetric) { 3229 PetscInt j,k; 3230 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3231 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3232 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3233 } 3234 for (j=0;j<subset_size;j++) { 3235 for (k=j;k<subset_size;k++) { 3236 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3237 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3238 } 3239 } 3240 } else { 3241 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3242 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3243 } 3244 } else { 3245 S = Sarray + cumarray; 3246 St = Starray + cumarray; 3247 } 3248 /* see if we can save some work */ 3249 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3250 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3251 } 3252 3253 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3254 B_neigs = 0; 3255 } else { 3256 if (sub_schurs->is_symmetric) { 3257 PetscBLASInt B_itype = 1; 3258 PetscBLASInt B_IL, B_IU; 3259 PetscReal eps = -1.0; /* dlamch? */ 3260 PetscInt nmin_s; 3261 PetscBool compute_range; 3262 3263 B_neigs = 0; 3264 compute_range = (PetscBool)!same_data; 3265 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3266 3267 if (pcbddc->dbg_flag) { 3268 PetscInt nc = 0; 3269 3270 if (sub_schurs->change_primal_sub) { 3271 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3272 } 3273 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d (range %d) (change %d).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3274 } 3275 3276 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3277 if (compute_range) { 3278 3279 /* ask for eigenvalues larger than thresh */ 3280 if (sub_schurs->is_posdef) { 3281 #if defined(PETSC_USE_COMPLEX) 3282 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3283 #else 3284 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3285 #endif 3286 } else { /* no theory so far, but it works nicely */ 3287 PetscInt recipe = 0,recipe_m = 1; 3288 PetscReal bb[2]; 3289 3290 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3291 switch (recipe) { 3292 case 0: 3293 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3294 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3295 #if defined(PETSC_USE_COMPLEX) 3296 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3297 #else 3298 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3299 #endif 3300 break; 3301 case 1: 3302 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3303 #if defined(PETSC_USE_COMPLEX) 3304 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3305 #else 3306 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3307 #endif 3308 if (!scal) { 3309 PetscBLASInt B_neigs2 = 0; 3310 3311 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3312 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3313 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3314 #if defined(PETSC_USE_COMPLEX) 3315 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3316 #else 3317 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3318 #endif 3319 B_neigs += B_neigs2; 3320 } 3321 break; 3322 case 2: 3323 if (scal) { 3324 bb[0] = PETSC_MIN_REAL; 3325 bb[1] = 0; 3326 #if defined(PETSC_USE_COMPLEX) 3327 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3328 #else 3329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3330 #endif 3331 } else { 3332 PetscBLASInt B_neigs2 = 0; 3333 PetscBool import = PETSC_FALSE; 3334 3335 lthresh = PetscMax(lthresh,0.0); 3336 if (lthresh > 0.0) { 3337 bb[0] = PETSC_MIN_REAL; 3338 bb[1] = lthresh*lthresh; 3339 3340 import = PETSC_TRUE; 3341 #if defined(PETSC_USE_COMPLEX) 3342 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3343 #else 3344 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3345 #endif 3346 } 3347 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3348 bb[1] = PETSC_MAX_REAL; 3349 if (import) { 3350 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3351 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3352 } 3353 #if defined(PETSC_USE_COMPLEX) 3354 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3355 #else 3356 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3357 #endif 3358 B_neigs += B_neigs2; 3359 } 3360 break; 3361 case 3: 3362 if (scal) { 3363 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3364 } else { 3365 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3366 } 3367 if (!scal) { 3368 bb[0] = uthresh; 3369 bb[1] = PETSC_MAX_REAL; 3370 #if defined(PETSC_USE_COMPLEX) 3371 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3372 #else 3373 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3374 #endif 3375 } 3376 if (recipe_m > 0 && B_N - B_neigs > 0) { 3377 PetscBLASInt B_neigs2 = 0; 3378 3379 B_IL = 1; 3380 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3381 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3382 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3383 #if defined(PETSC_USE_COMPLEX) 3384 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3385 #else 3386 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3387 #endif 3388 B_neigs += B_neigs2; 3389 } 3390 break; 3391 case 4: 3392 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3393 #if defined(PETSC_USE_COMPLEX) 3394 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3395 #else 3396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3397 #endif 3398 { 3399 PetscBLASInt B_neigs2 = 0; 3400 3401 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3402 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3403 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3404 #if defined(PETSC_USE_COMPLEX) 3405 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3406 #else 3407 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3408 #endif 3409 B_neigs += B_neigs2; 3410 } 3411 break; 3412 case 5: /* same as before: first compute all eigenvalues, then filter */ 3413 #if defined(PETSC_USE_COMPLEX) 3414 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3415 #else 3416 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3417 #endif 3418 { 3419 PetscInt e,k,ne; 3420 for (e=0,ne=0;e<B_neigs;e++) { 3421 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3422 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3423 eigs[ne] = eigs[e]; 3424 ne++; 3425 } 3426 } 3427 ierr = PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));CHKERRQ(ierr); 3428 B_neigs = ne; 3429 } 3430 break; 3431 default: 3432 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3433 break; 3434 } 3435 } 3436 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3437 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3438 B_IL = 1; 3439 #if defined(PETSC_USE_COMPLEX) 3440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3441 #else 3442 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3443 #endif 3444 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3445 PetscInt k; 3446 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3447 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3448 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3449 nmin = nmax; 3450 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3451 for (k=0;k<nmax;k++) { 3452 eigs[k] = 1./PETSC_SMALL; 3453 eigv[k*(subset_size+1)] = 1.0; 3454 } 3455 } 3456 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3457 if (B_ierr) { 3458 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3459 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3460 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3461 } 3462 3463 if (B_neigs > nmax) { 3464 if (pcbddc->dbg_flag) { 3465 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);CHKERRQ(ierr); 3466 } 3467 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3468 B_neigs = nmax; 3469 } 3470 3471 nmin_s = PetscMin(nmin,B_N); 3472 if (B_neigs < nmin_s) { 3473 PetscBLASInt B_neigs2 = 0; 3474 3475 if (pcbddc->use_deluxe_scaling) { 3476 if (scal) { 3477 B_IU = nmin_s; 3478 B_IL = B_neigs + 1; 3479 } else { 3480 B_IL = B_N - nmin_s + 1; 3481 B_IU = B_N - B_neigs; 3482 } 3483 } else { 3484 B_IL = B_neigs + 1; 3485 B_IU = nmin_s; 3486 } 3487 if (pcbddc->dbg_flag) { 3488 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3489 } 3490 if (sub_schurs->is_symmetric) { 3491 PetscInt j,k; 3492 for (j=0;j<subset_size;j++) { 3493 for (k=j;k<subset_size;k++) { 3494 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3495 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3496 } 3497 } 3498 } else { 3499 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3500 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3501 } 3502 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3503 #if defined(PETSC_USE_COMPLEX) 3504 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3505 #else 3506 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3507 #endif 3508 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3509 B_neigs += B_neigs2; 3510 } 3511 if (B_ierr) { 3512 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3513 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3514 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3515 } 3516 if (pcbddc->dbg_flag) { 3517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3518 for (j=0;j<B_neigs;j++) { 3519 if (eigs[j] == 0.0) { 3520 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3521 } else { 3522 if (pcbddc->use_deluxe_scaling) { 3523 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3524 } else { 3525 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3526 } 3527 } 3528 } 3529 } 3530 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3531 } 3532 /* change the basis back to the original one */ 3533 if (sub_schurs->change) { 3534 Mat change,phi,phit; 3535 3536 if (pcbddc->dbg_flag > 2) { 3537 PetscInt ii; 3538 for (ii=0;ii<B_neigs;ii++) { 3539 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3540 for (j=0;j<B_N;j++) { 3541 #if defined(PETSC_USE_COMPLEX) 3542 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3543 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3544 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3545 #else 3546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3547 #endif 3548 } 3549 } 3550 } 3551 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3552 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3553 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3554 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3555 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3556 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3557 } 3558 maxneigs = PetscMax(B_neigs,maxneigs); 3559 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3560 if (B_neigs) { 3561 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3562 3563 if (pcbddc->dbg_flag > 1) { 3564 PetscInt ii; 3565 for (ii=0;ii<B_neigs;ii++) { 3566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3567 for (j=0;j<B_N;j++) { 3568 #if defined(PETSC_USE_COMPLEX) 3569 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3570 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3572 #else 3573 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3574 #endif 3575 } 3576 } 3577 } 3578 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3579 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3580 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3581 cum++; 3582 } 3583 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3584 /* shift for next computation */ 3585 cumarray += subset_size*subset_size; 3586 } 3587 if (pcbddc->dbg_flag) { 3588 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3589 } 3590 3591 if (mss) { 3592 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3593 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3594 /* destroy matrices (junk) */ 3595 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3596 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3597 } 3598 if (allocated_S_St) { 3599 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3600 } 3601 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3602 #if defined(PETSC_USE_COMPLEX) 3603 ierr = PetscFree(rwork);CHKERRQ(ierr); 3604 #endif 3605 if (pcbddc->dbg_flag) { 3606 PetscInt maxneigs_r; 3607 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3608 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3609 } 3610 PetscFunctionReturn(0); 3611 } 3612 3613 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3614 { 3615 PetscScalar *coarse_submat_vals; 3616 PetscErrorCode ierr; 3617 3618 PetscFunctionBegin; 3619 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3620 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3621 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3622 3623 /* Setup local neumann solver ksp_R */ 3624 /* PCBDDCSetUpLocalScatters should be called first! */ 3625 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3626 3627 /* 3628 Setup local correction and local part of coarse basis. 3629 Gives back the dense local part of the coarse matrix in column major ordering 3630 */ 3631 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3632 3633 /* Compute total number of coarse nodes and setup coarse solver */ 3634 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3635 3636 /* free */ 3637 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3638 PetscFunctionReturn(0); 3639 } 3640 3641 PetscErrorCode PCBDDCResetCustomization(PC pc) 3642 { 3643 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3644 PetscErrorCode ierr; 3645 3646 PetscFunctionBegin; 3647 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3648 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3649 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3650 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3651 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3652 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3653 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3654 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3655 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3656 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3657 PetscFunctionReturn(0); 3658 } 3659 3660 PetscErrorCode PCBDDCResetTopography(PC pc) 3661 { 3662 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3663 PetscInt i; 3664 PetscErrorCode ierr; 3665 3666 PetscFunctionBegin; 3667 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3668 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3669 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3670 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3671 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3672 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3673 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3674 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3675 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3676 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3677 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3678 for (i=0;i<pcbddc->n_local_subs;i++) { 3679 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3680 } 3681 pcbddc->n_local_subs = 0; 3682 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3683 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3684 pcbddc->graphanalyzed = PETSC_FALSE; 3685 pcbddc->recompute_topography = PETSC_TRUE; 3686 pcbddc->corner_selected = PETSC_FALSE; 3687 PetscFunctionReturn(0); 3688 } 3689 3690 PetscErrorCode PCBDDCResetSolvers(PC pc) 3691 { 3692 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3693 PetscErrorCode ierr; 3694 3695 PetscFunctionBegin; 3696 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3697 if (pcbddc->coarse_phi_B) { 3698 PetscScalar *array; 3699 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3700 ierr = PetscFree(array);CHKERRQ(ierr); 3701 } 3702 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3703 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3704 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3705 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3706 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3707 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3708 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3709 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3710 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3711 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3712 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3713 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3714 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3715 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3716 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3717 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3718 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3719 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3720 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3721 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3722 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3723 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3724 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3725 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3726 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3727 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3728 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3729 if (pcbddc->benign_zerodiag_subs) { 3730 PetscInt i; 3731 for (i=0;i<pcbddc->benign_n;i++) { 3732 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3733 } 3734 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3735 } 3736 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3737 PetscFunctionReturn(0); 3738 } 3739 3740 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3741 { 3742 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3743 PC_IS *pcis = (PC_IS*)pc->data; 3744 VecType impVecType; 3745 PetscInt n_constraints,n_R,old_size; 3746 PetscErrorCode ierr; 3747 3748 PetscFunctionBegin; 3749 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3750 n_R = pcis->n - pcbddc->n_vertices; 3751 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3752 /* local work vectors (try to avoid unneeded work)*/ 3753 /* R nodes */ 3754 old_size = -1; 3755 if (pcbddc->vec1_R) { 3756 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3757 } 3758 if (n_R != old_size) { 3759 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3760 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3761 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3762 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3763 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3764 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3765 } 3766 /* local primal dofs */ 3767 old_size = -1; 3768 if (pcbddc->vec1_P) { 3769 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3770 } 3771 if (pcbddc->local_primal_size != old_size) { 3772 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3773 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3774 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3775 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3776 } 3777 /* local explicit constraints */ 3778 old_size = -1; 3779 if (pcbddc->vec1_C) { 3780 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3781 } 3782 if (n_constraints && n_constraints != old_size) { 3783 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3784 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3785 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3786 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3787 } 3788 PetscFunctionReturn(0); 3789 } 3790 3791 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3792 { 3793 PetscErrorCode ierr; 3794 /* pointers to pcis and pcbddc */ 3795 PC_IS* pcis = (PC_IS*)pc->data; 3796 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3797 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3798 /* submatrices of local problem */ 3799 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3800 /* submatrices of local coarse problem */ 3801 Mat S_VV,S_CV,S_VC,S_CC; 3802 /* working matrices */ 3803 Mat C_CR; 3804 /* additional working stuff */ 3805 PC pc_R; 3806 Mat F,Brhs = NULL; 3807 Vec dummy_vec; 3808 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3809 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3810 PetscScalar *work; 3811 PetscInt *idx_V_B; 3812 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3813 PetscInt i,n_R,n_D,n_B; 3814 3815 /* some shortcuts to scalars */ 3816 PetscScalar one=1.0,m_one=-1.0; 3817 3818 PetscFunctionBegin; 3819 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3820 3821 /* Set Non-overlapping dimensions */ 3822 n_vertices = pcbddc->n_vertices; 3823 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3824 n_B = pcis->n_B; 3825 n_D = pcis->n - n_B; 3826 n_R = pcis->n - n_vertices; 3827 3828 /* vertices in boundary numbering */ 3829 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3830 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3831 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3832 3833 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3834 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3835 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3836 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3837 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3838 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3839 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3840 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3841 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3842 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3843 3844 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3845 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3846 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3847 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3848 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3849 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3850 lda_rhs = n_R; 3851 need_benign_correction = PETSC_FALSE; 3852 if (isLU || isILU || isCHOL) { 3853 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3854 } else if (sub_schurs && sub_schurs->reuse_solver) { 3855 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3856 MatFactorType type; 3857 3858 F = reuse_solver->F; 3859 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3860 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3861 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3862 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3863 } else { 3864 F = NULL; 3865 } 3866 3867 /* determine if we can use a sparse right-hand side */ 3868 sparserhs = PETSC_FALSE; 3869 if (F) { 3870 MatSolverType solver; 3871 3872 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3873 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3874 } 3875 3876 /* allocate workspace */ 3877 n = 0; 3878 if (n_constraints) { 3879 n += lda_rhs*n_constraints; 3880 } 3881 if (n_vertices) { 3882 n = PetscMax(2*lda_rhs*n_vertices,n); 3883 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3884 } 3885 if (!pcbddc->symmetric_primal) { 3886 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3887 } 3888 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3889 3890 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3891 dummy_vec = NULL; 3892 if (need_benign_correction && lda_rhs != n_R && F) { 3893 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3894 } 3895 3896 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3897 if (n_constraints) { 3898 Mat M3,C_B; 3899 IS is_aux; 3900 PetscScalar *array,*array2; 3901 3902 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3903 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3904 3905 /* Extract constraints on R nodes: C_{CR} */ 3906 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3907 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3908 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3909 3910 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3911 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3912 if (!sparserhs) { 3913 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3914 for (i=0;i<n_constraints;i++) { 3915 const PetscScalar *row_cmat_values; 3916 const PetscInt *row_cmat_indices; 3917 PetscInt size_of_constraint,j; 3918 3919 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3920 for (j=0;j<size_of_constraint;j++) { 3921 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3922 } 3923 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3924 } 3925 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3926 } else { 3927 Mat tC_CR; 3928 3929 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3930 if (lda_rhs != n_R) { 3931 PetscScalar *aa; 3932 PetscInt r,*ii,*jj; 3933 PetscBool done; 3934 3935 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3936 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3937 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3938 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3939 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3940 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3941 } else { 3942 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3943 tC_CR = C_CR; 3944 } 3945 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3946 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3947 } 3948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3949 if (F) { 3950 if (need_benign_correction) { 3951 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3952 3953 /* rhs is already zero on interior dofs, no need to change the rhs */ 3954 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3955 } 3956 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3957 if (need_benign_correction) { 3958 PetscScalar *marr; 3959 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3960 3961 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3962 if (lda_rhs != n_R) { 3963 for (i=0;i<n_constraints;i++) { 3964 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3965 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3966 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3967 } 3968 } else { 3969 for (i=0;i<n_constraints;i++) { 3970 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3971 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3972 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3973 } 3974 } 3975 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3976 } 3977 } else { 3978 PetscScalar *marr; 3979 3980 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3981 for (i=0;i<n_constraints;i++) { 3982 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3983 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3984 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3985 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3986 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3987 } 3988 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3989 } 3990 if (sparserhs) { 3991 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3992 } 3993 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3994 if (!pcbddc->switch_static) { 3995 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3996 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3997 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3998 for (i=0;i<n_constraints;i++) { 3999 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4000 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4001 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4002 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4003 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4004 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4005 } 4006 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4007 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4008 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4009 } else { 4010 if (lda_rhs != n_R) { 4011 IS dummy; 4012 4013 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4014 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4015 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4016 } else { 4017 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4018 pcbddc->local_auxmat2 = local_auxmat2_R; 4019 } 4020 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4021 } 4022 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4023 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 4024 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4025 if (isCHOL) { 4026 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4027 } else { 4028 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4029 } 4030 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4031 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4032 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4033 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4034 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4035 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4036 } 4037 4038 /* Get submatrices from subdomain matrix */ 4039 if (n_vertices) { 4040 IS is_aux; 4041 PetscBool isseqaij; 4042 4043 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4044 IS tis; 4045 4046 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4047 ierr = ISSort(tis);CHKERRQ(ierr); 4048 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4049 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4050 } else { 4051 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4052 } 4053 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4054 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4055 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4056 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 4057 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4058 } 4059 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4060 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4061 } 4062 4063 /* Matrix of coarse basis functions (local) */ 4064 if (pcbddc->coarse_phi_B) { 4065 PetscInt on_B,on_primal,on_D=n_D; 4066 if (pcbddc->coarse_phi_D) { 4067 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4068 } 4069 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4070 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4071 PetscScalar *marray; 4072 4073 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4074 ierr = PetscFree(marray);CHKERRQ(ierr); 4075 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4076 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4077 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4078 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4079 } 4080 } 4081 4082 if (!pcbddc->coarse_phi_B) { 4083 PetscScalar *marr; 4084 4085 /* memory size */ 4086 n = n_B*pcbddc->local_primal_size; 4087 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4088 if (!pcbddc->symmetric_primal) n *= 2; 4089 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4090 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4091 marr += n_B*pcbddc->local_primal_size; 4092 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4093 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4094 marr += n_D*pcbddc->local_primal_size; 4095 } 4096 if (!pcbddc->symmetric_primal) { 4097 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4098 marr += n_B*pcbddc->local_primal_size; 4099 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4100 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4101 } 4102 } else { 4103 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4104 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4105 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4106 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4107 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4108 } 4109 } 4110 } 4111 4112 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4113 p0_lidx_I = NULL; 4114 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4115 const PetscInt *idxs; 4116 4117 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4118 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4119 for (i=0;i<pcbddc->benign_n;i++) { 4120 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4121 } 4122 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4123 } 4124 4125 /* vertices */ 4126 if (n_vertices) { 4127 PetscBool restoreavr = PETSC_FALSE; 4128 4129 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4130 4131 if (n_R) { 4132 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4133 PetscBLASInt B_N,B_one = 1; 4134 PetscScalar *x,*y; 4135 4136 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4137 if (need_benign_correction) { 4138 ISLocalToGlobalMapping RtoN; 4139 IS is_p0; 4140 PetscInt *idxs_p0,n; 4141 4142 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4143 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4144 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4145 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 4146 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4147 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4148 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4149 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4150 } 4151 4152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4153 if (!sparserhs || need_benign_correction) { 4154 if (lda_rhs == n_R) { 4155 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4156 } else { 4157 PetscScalar *av,*array; 4158 const PetscInt *xadj,*adjncy; 4159 PetscInt n; 4160 PetscBool flg_row; 4161 4162 array = work+lda_rhs*n_vertices; 4163 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4164 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4165 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4166 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4167 for (i=0;i<n;i++) { 4168 PetscInt j; 4169 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4170 } 4171 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4172 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4173 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4174 } 4175 if (need_benign_correction) { 4176 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4177 PetscScalar *marr; 4178 4179 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4180 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4181 4182 | 0 0 0 | (V) 4183 L = | 0 0 -1 | (P-p0) 4184 | 0 0 -1 | (p0) 4185 4186 */ 4187 for (i=0;i<reuse_solver->benign_n;i++) { 4188 const PetscScalar *vals; 4189 const PetscInt *idxs,*idxs_zero; 4190 PetscInt n,j,nz; 4191 4192 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4193 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4194 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4195 for (j=0;j<n;j++) { 4196 PetscScalar val = vals[j]; 4197 PetscInt k,col = idxs[j]; 4198 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4199 } 4200 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4201 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4202 } 4203 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4204 } 4205 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4206 Brhs = A_RV; 4207 } else { 4208 Mat tA_RVT,A_RVT; 4209 4210 if (!pcbddc->symmetric_primal) { 4211 /* A_RV already scaled by -1 */ 4212 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4213 } else { 4214 restoreavr = PETSC_TRUE; 4215 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4216 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4217 A_RVT = A_VR; 4218 } 4219 if (lda_rhs != n_R) { 4220 PetscScalar *aa; 4221 PetscInt r,*ii,*jj; 4222 PetscBool done; 4223 4224 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4225 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4226 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4227 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4228 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4229 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4230 } else { 4231 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4232 tA_RVT = A_RVT; 4233 } 4234 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4235 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4236 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4237 } 4238 if (F) { 4239 /* need to correct the rhs */ 4240 if (need_benign_correction) { 4241 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4242 PetscScalar *marr; 4243 4244 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4245 if (lda_rhs != n_R) { 4246 for (i=0;i<n_vertices;i++) { 4247 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4248 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4249 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4250 } 4251 } else { 4252 for (i=0;i<n_vertices;i++) { 4253 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4254 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4255 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4256 } 4257 } 4258 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4259 } 4260 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4261 if (restoreavr) { 4262 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4263 } 4264 /* need to correct the solution */ 4265 if (need_benign_correction) { 4266 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4267 PetscScalar *marr; 4268 4269 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4270 if (lda_rhs != n_R) { 4271 for (i=0;i<n_vertices;i++) { 4272 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4273 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4274 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4275 } 4276 } else { 4277 for (i=0;i<n_vertices;i++) { 4278 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4279 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4280 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4281 } 4282 } 4283 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4284 } 4285 } else { 4286 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4287 for (i=0;i<n_vertices;i++) { 4288 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4289 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4290 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4291 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4292 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4293 } 4294 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4295 } 4296 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4297 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4298 /* S_VV and S_CV */ 4299 if (n_constraints) { 4300 Mat B; 4301 4302 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4303 for (i=0;i<n_vertices;i++) { 4304 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4305 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4306 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4307 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4308 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4309 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4310 } 4311 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4312 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4313 ierr = MatDestroy(&B);CHKERRQ(ierr); 4314 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4315 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4316 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4317 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4318 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4319 ierr = MatDestroy(&B);CHKERRQ(ierr); 4320 } 4321 if (lda_rhs != n_R) { 4322 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4323 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4324 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4325 } 4326 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4327 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4328 if (need_benign_correction) { 4329 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4330 PetscScalar *marr,*sums; 4331 4332 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4333 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4334 for (i=0;i<reuse_solver->benign_n;i++) { 4335 const PetscScalar *vals; 4336 const PetscInt *idxs,*idxs_zero; 4337 PetscInt n,j,nz; 4338 4339 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4340 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4341 for (j=0;j<n_vertices;j++) { 4342 PetscInt k; 4343 sums[j] = 0.; 4344 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4345 } 4346 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4347 for (j=0;j<n;j++) { 4348 PetscScalar val = vals[j]; 4349 PetscInt k; 4350 for (k=0;k<n_vertices;k++) { 4351 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4352 } 4353 } 4354 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4355 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4356 } 4357 ierr = PetscFree(sums);CHKERRQ(ierr); 4358 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4359 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4360 } 4361 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4362 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4363 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4364 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4365 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4366 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4367 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4368 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4369 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4370 } else { 4371 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4372 } 4373 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4374 4375 /* coarse basis functions */ 4376 for (i=0;i<n_vertices;i++) { 4377 PetscScalar *y; 4378 4379 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4380 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4381 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4382 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4383 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4384 y[n_B*i+idx_V_B[i]] = 1.0; 4385 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4386 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4387 4388 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4389 PetscInt j; 4390 4391 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4392 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4393 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4394 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4395 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4396 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4397 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4398 } 4399 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4400 } 4401 /* if n_R == 0 the object is not destroyed */ 4402 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4403 } 4404 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4405 4406 if (n_constraints) { 4407 Mat B; 4408 4409 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4410 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4411 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4412 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4413 if (n_vertices) { 4414 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4415 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4416 } else { 4417 Mat S_VCt; 4418 4419 if (lda_rhs != n_R) { 4420 ierr = MatDestroy(&B);CHKERRQ(ierr); 4421 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4422 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4423 } 4424 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4425 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4426 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4427 } 4428 } 4429 ierr = MatDestroy(&B);CHKERRQ(ierr); 4430 /* coarse basis functions */ 4431 for (i=0;i<n_constraints;i++) { 4432 PetscScalar *y; 4433 4434 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4435 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4436 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4437 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4438 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4439 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4440 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4441 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4442 PetscInt j; 4443 4444 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4445 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4446 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4447 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4448 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4449 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4450 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4451 } 4452 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4453 } 4454 } 4455 if (n_constraints) { 4456 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4457 } 4458 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4459 4460 /* coarse matrix entries relative to B_0 */ 4461 if (pcbddc->benign_n) { 4462 Mat B0_B,B0_BPHI; 4463 IS is_dummy; 4464 PetscScalar *data; 4465 PetscInt j; 4466 4467 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4468 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4469 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4470 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4471 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4472 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4473 for (j=0;j<pcbddc->benign_n;j++) { 4474 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4475 for (i=0;i<pcbddc->local_primal_size;i++) { 4476 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4477 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4478 } 4479 } 4480 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4481 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4482 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4483 } 4484 4485 /* compute other basis functions for non-symmetric problems */ 4486 if (!pcbddc->symmetric_primal) { 4487 Mat B_V=NULL,B_C=NULL; 4488 PetscScalar *marray; 4489 4490 if (n_constraints) { 4491 Mat S_CCT,C_CRT; 4492 4493 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4494 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4495 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4496 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4497 if (n_vertices) { 4498 Mat S_VCT; 4499 4500 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4501 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4502 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4503 } 4504 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4505 } else { 4506 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4507 } 4508 if (n_vertices && n_R) { 4509 PetscScalar *av,*marray; 4510 const PetscInt *xadj,*adjncy; 4511 PetscInt n; 4512 PetscBool flg_row; 4513 4514 /* B_V = B_V - A_VR^T */ 4515 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4516 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4517 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4518 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4519 for (i=0;i<n;i++) { 4520 PetscInt j; 4521 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4522 } 4523 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4524 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4525 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4526 } 4527 4528 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4529 if (n_vertices) { 4530 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4531 for (i=0;i<n_vertices;i++) { 4532 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4533 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4534 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4535 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4536 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4537 } 4538 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4539 } 4540 if (B_C) { 4541 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4542 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4543 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4544 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4545 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4546 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4547 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4548 } 4549 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4550 } 4551 /* coarse basis functions */ 4552 for (i=0;i<pcbddc->local_primal_size;i++) { 4553 PetscScalar *y; 4554 4555 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4556 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4557 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4558 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4559 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4560 if (i<n_vertices) { 4561 y[n_B*i+idx_V_B[i]] = 1.0; 4562 } 4563 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4564 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4565 4566 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4567 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4568 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4569 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4570 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4571 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4572 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4573 } 4574 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4575 } 4576 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4577 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4578 } 4579 4580 /* free memory */ 4581 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4582 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4583 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4584 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4585 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4586 ierr = PetscFree(work);CHKERRQ(ierr); 4587 if (n_vertices) { 4588 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4589 } 4590 if (n_constraints) { 4591 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4592 } 4593 /* Checking coarse_sub_mat and coarse basis functios */ 4594 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4595 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4596 if (pcbddc->dbg_flag) { 4597 Mat coarse_sub_mat; 4598 Mat AUXMAT,TM1,TM2,TM3,TM4; 4599 Mat coarse_phi_D,coarse_phi_B; 4600 Mat coarse_psi_D,coarse_psi_B; 4601 Mat A_II,A_BB,A_IB,A_BI; 4602 Mat C_B,CPHI; 4603 IS is_dummy; 4604 Vec mones; 4605 MatType checkmattype=MATSEQAIJ; 4606 PetscReal real_value; 4607 4608 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4609 Mat A; 4610 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4611 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4612 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4613 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4614 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4615 ierr = MatDestroy(&A);CHKERRQ(ierr); 4616 } else { 4617 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4618 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4619 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4620 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4621 } 4622 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4623 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4624 if (!pcbddc->symmetric_primal) { 4625 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4626 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4627 } 4628 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4629 4630 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4631 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4632 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4633 if (!pcbddc->symmetric_primal) { 4634 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4635 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4636 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4637 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4638 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4639 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4640 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4641 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4642 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4643 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4644 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4645 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4646 } else { 4647 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4648 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4649 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4650 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4651 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4652 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4653 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4654 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4655 } 4656 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4657 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4658 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4659 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4660 if (pcbddc->benign_n) { 4661 Mat B0_B,B0_BPHI; 4662 PetscScalar *data,*data2; 4663 PetscInt j; 4664 4665 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4666 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4667 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4668 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4669 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4670 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4671 for (j=0;j<pcbddc->benign_n;j++) { 4672 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4673 for (i=0;i<pcbddc->local_primal_size;i++) { 4674 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4675 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4676 } 4677 } 4678 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4679 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4680 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4681 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4682 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4683 } 4684 #if 0 4685 { 4686 PetscViewer viewer; 4687 char filename[256]; 4688 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4689 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4690 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4691 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4692 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4693 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4694 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4695 if (pcbddc->coarse_phi_B) { 4696 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4697 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4698 } 4699 if (pcbddc->coarse_phi_D) { 4700 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4701 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4702 } 4703 if (pcbddc->coarse_psi_B) { 4704 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4705 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4706 } 4707 if (pcbddc->coarse_psi_D) { 4708 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4709 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4710 } 4711 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4712 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4713 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4714 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4715 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4716 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4717 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4718 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4719 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4720 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4721 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4722 } 4723 #endif 4724 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4725 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4726 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4727 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4728 4729 /* check constraints */ 4730 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4731 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4732 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4733 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4734 } else { 4735 PetscScalar *data; 4736 Mat tmat; 4737 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4739 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4740 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4741 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4742 } 4743 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4744 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4745 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4746 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4747 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4748 if (!pcbddc->symmetric_primal) { 4749 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4750 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4751 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4752 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4753 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4754 } 4755 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4756 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4757 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4758 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4759 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4760 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4761 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4762 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4763 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4764 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4765 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4766 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4767 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4768 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4769 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4770 if (!pcbddc->symmetric_primal) { 4771 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4772 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4773 } 4774 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4775 } 4776 /* get back data */ 4777 *coarse_submat_vals_n = coarse_submat_vals; 4778 PetscFunctionReturn(0); 4779 } 4780 4781 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4782 { 4783 Mat *work_mat; 4784 IS isrow_s,iscol_s; 4785 PetscBool rsorted,csorted; 4786 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4787 PetscErrorCode ierr; 4788 4789 PetscFunctionBegin; 4790 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4791 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4792 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4793 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4794 4795 if (!rsorted) { 4796 const PetscInt *idxs; 4797 PetscInt *idxs_sorted,i; 4798 4799 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4800 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4801 for (i=0;i<rsize;i++) { 4802 idxs_perm_r[i] = i; 4803 } 4804 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4805 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4806 for (i=0;i<rsize;i++) { 4807 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4808 } 4809 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4810 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4811 } else { 4812 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4813 isrow_s = isrow; 4814 } 4815 4816 if (!csorted) { 4817 if (isrow == iscol) { 4818 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4819 iscol_s = isrow_s; 4820 } else { 4821 const PetscInt *idxs; 4822 PetscInt *idxs_sorted,i; 4823 4824 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4825 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4826 for (i=0;i<csize;i++) { 4827 idxs_perm_c[i] = i; 4828 } 4829 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4830 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4831 for (i=0;i<csize;i++) { 4832 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4833 } 4834 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4835 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4836 } 4837 } else { 4838 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4839 iscol_s = iscol; 4840 } 4841 4842 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4843 4844 if (!rsorted || !csorted) { 4845 Mat new_mat; 4846 IS is_perm_r,is_perm_c; 4847 4848 if (!rsorted) { 4849 PetscInt *idxs_r,i; 4850 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4851 for (i=0;i<rsize;i++) { 4852 idxs_r[idxs_perm_r[i]] = i; 4853 } 4854 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4855 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4856 } else { 4857 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4858 } 4859 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4860 4861 if (!csorted) { 4862 if (isrow_s == iscol_s) { 4863 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4864 is_perm_c = is_perm_r; 4865 } else { 4866 PetscInt *idxs_c,i; 4867 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4868 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4869 for (i=0;i<csize;i++) { 4870 idxs_c[idxs_perm_c[i]] = i; 4871 } 4872 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4873 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4874 } 4875 } else { 4876 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4877 } 4878 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4879 4880 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4881 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4882 work_mat[0] = new_mat; 4883 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4884 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4885 } 4886 4887 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4888 *B = work_mat[0]; 4889 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4890 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4891 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4892 PetscFunctionReturn(0); 4893 } 4894 4895 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4896 { 4897 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4898 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4899 Mat new_mat,lA; 4900 IS is_local,is_global; 4901 PetscInt local_size; 4902 PetscBool isseqaij; 4903 PetscErrorCode ierr; 4904 4905 PetscFunctionBegin; 4906 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4907 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4908 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4909 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4910 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4911 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4912 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4913 4914 /* check */ 4915 if (pcbddc->dbg_flag) { 4916 Vec x,x_change; 4917 PetscReal error; 4918 4919 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4920 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4921 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4922 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4923 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4924 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4925 if (!pcbddc->change_interior) { 4926 const PetscScalar *x,*y,*v; 4927 PetscReal lerror = 0.; 4928 PetscInt i; 4929 4930 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4931 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4932 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4933 for (i=0;i<local_size;i++) 4934 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4935 lerror = PetscAbsScalar(x[i]-y[i]); 4936 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4937 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4938 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4939 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4940 if (error > PETSC_SMALL) { 4941 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4942 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4943 } else { 4944 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4945 } 4946 } 4947 } 4948 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4949 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4950 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4951 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4952 if (error > PETSC_SMALL) { 4953 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4954 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4955 } else { 4956 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4957 } 4958 } 4959 ierr = VecDestroy(&x);CHKERRQ(ierr); 4960 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4961 } 4962 4963 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4964 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4965 4966 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4967 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4968 if (isseqaij) { 4969 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4970 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4971 if (lA) { 4972 Mat work; 4973 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4974 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4975 ierr = MatDestroy(&work);CHKERRQ(ierr); 4976 } 4977 } else { 4978 Mat work_mat; 4979 4980 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4981 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4982 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4983 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4984 if (lA) { 4985 Mat work; 4986 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4987 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4988 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4989 ierr = MatDestroy(&work);CHKERRQ(ierr); 4990 } 4991 } 4992 if (matis->A->symmetric_set) { 4993 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4994 #if !defined(PETSC_USE_COMPLEX) 4995 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4996 #endif 4997 } 4998 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4999 PetscFunctionReturn(0); 5000 } 5001 5002 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5003 { 5004 PC_IS* pcis = (PC_IS*)(pc->data); 5005 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5006 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5007 PetscInt *idx_R_local=NULL; 5008 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5009 PetscInt vbs,bs; 5010 PetscBT bitmask=NULL; 5011 PetscErrorCode ierr; 5012 5013 PetscFunctionBegin; 5014 /* 5015 No need to setup local scatters if 5016 - primal space is unchanged 5017 AND 5018 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5019 AND 5020 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5021 */ 5022 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5023 PetscFunctionReturn(0); 5024 } 5025 /* destroy old objects */ 5026 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5027 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5028 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5029 /* Set Non-overlapping dimensions */ 5030 n_B = pcis->n_B; 5031 n_D = pcis->n - n_B; 5032 n_vertices = pcbddc->n_vertices; 5033 5034 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5035 5036 /* create auxiliary bitmask and allocate workspace */ 5037 if (!sub_schurs || !sub_schurs->reuse_solver) { 5038 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5039 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5040 for (i=0;i<n_vertices;i++) { 5041 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5042 } 5043 5044 for (i=0, n_R=0; i<pcis->n; i++) { 5045 if (!PetscBTLookup(bitmask,i)) { 5046 idx_R_local[n_R++] = i; 5047 } 5048 } 5049 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5050 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5051 5052 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5053 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5054 } 5055 5056 /* Block code */ 5057 vbs = 1; 5058 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5059 if (bs>1 && !(n_vertices%bs)) { 5060 PetscBool is_blocked = PETSC_TRUE; 5061 PetscInt *vary; 5062 if (!sub_schurs || !sub_schurs->reuse_solver) { 5063 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5064 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 5065 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5066 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5067 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5068 for (i=0; i<pcis->n/bs; i++) { 5069 if (vary[i]!=0 && vary[i]!=bs) { 5070 is_blocked = PETSC_FALSE; 5071 break; 5072 } 5073 } 5074 ierr = PetscFree(vary);CHKERRQ(ierr); 5075 } else { 5076 /* Verify directly the R set */ 5077 for (i=0; i<n_R/bs; i++) { 5078 PetscInt j,node=idx_R_local[bs*i]; 5079 for (j=1; j<bs; j++) { 5080 if (node != idx_R_local[bs*i+j]-j) { 5081 is_blocked = PETSC_FALSE; 5082 break; 5083 } 5084 } 5085 } 5086 } 5087 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5088 vbs = bs; 5089 for (i=0;i<n_R/vbs;i++) { 5090 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5091 } 5092 } 5093 } 5094 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5095 if (sub_schurs && sub_schurs->reuse_solver) { 5096 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5097 5098 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5099 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5100 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5101 reuse_solver->is_R = pcbddc->is_R_local; 5102 } else { 5103 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5104 } 5105 5106 /* print some info if requested */ 5107 if (pcbddc->dbg_flag) { 5108 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5109 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5110 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5111 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5112 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5114 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5115 } 5116 5117 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5118 if (!sub_schurs || !sub_schurs->reuse_solver) { 5119 IS is_aux1,is_aux2; 5120 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5121 5122 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5123 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5124 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5125 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5126 for (i=0; i<n_D; i++) { 5127 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5128 } 5129 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5130 for (i=0, j=0; i<n_R; i++) { 5131 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5132 aux_array1[j++] = i; 5133 } 5134 } 5135 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5136 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5137 for (i=0, j=0; i<n_B; i++) { 5138 if (!PetscBTLookup(bitmask,is_indices[i])) { 5139 aux_array2[j++] = i; 5140 } 5141 } 5142 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5143 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5144 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5145 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5146 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5147 5148 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5149 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5150 for (i=0, j=0; i<n_R; i++) { 5151 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5152 aux_array1[j++] = i; 5153 } 5154 } 5155 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5156 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5157 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5158 } 5159 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5160 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5161 } else { 5162 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5163 IS tis; 5164 PetscInt schur_size; 5165 5166 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5167 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5168 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5169 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5170 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5171 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5172 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5173 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5174 } 5175 } 5176 PetscFunctionReturn(0); 5177 } 5178 5179 5180 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5181 { 5182 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5183 PC_IS *pcis = (PC_IS*)pc->data; 5184 PC pc_temp; 5185 Mat A_RR; 5186 MatReuse reuse; 5187 PetscScalar m_one = -1.0; 5188 PetscReal value; 5189 PetscInt n_D,n_R; 5190 PetscBool check_corr,issbaij; 5191 PetscErrorCode ierr; 5192 /* prefixes stuff */ 5193 char dir_prefix[256],neu_prefix[256],str_level[16]; 5194 size_t len; 5195 5196 PetscFunctionBegin; 5197 5198 /* compute prefixes */ 5199 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5200 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5201 if (!pcbddc->current_level) { 5202 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5203 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5204 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5205 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5206 } else { 5207 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5208 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5209 len -= 15; /* remove "pc_bddc_coarse_" */ 5210 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5211 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5212 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5213 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5214 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5215 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5216 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5217 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5218 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5219 } 5220 5221 /* DIRICHLET PROBLEM */ 5222 if (dirichlet) { 5223 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5224 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5225 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 5226 if (pcbddc->dbg_flag) { 5227 Mat A_IIn; 5228 5229 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5230 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5231 pcis->A_II = A_IIn; 5232 } 5233 } 5234 if (pcbddc->local_mat->symmetric_set) { 5235 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5236 } 5237 /* Matrix for Dirichlet problem is pcis->A_II */ 5238 n_D = pcis->n - pcis->n_B; 5239 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5240 void (*f)(void) = 0; 5241 5242 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5243 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5244 /* default */ 5245 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5246 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5247 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5248 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5249 if (issbaij) { 5250 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5251 } else { 5252 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5253 } 5254 /* Allow user's customization */ 5255 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5256 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5257 if (f && pcbddc->mat_graph->cloc) { 5258 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5259 const PetscInt *idxs; 5260 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5261 5262 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5263 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5264 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5265 for (i=0;i<nl;i++) { 5266 for (d=0;d<cdim;d++) { 5267 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5268 } 5269 } 5270 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5271 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5272 ierr = PetscFree(scoords);CHKERRQ(ierr); 5273 } 5274 } 5275 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5276 if (sub_schurs && sub_schurs->reuse_solver) { 5277 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5278 5279 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5280 } 5281 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5282 if (!n_D) { 5283 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5284 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5285 } 5286 /* set ksp_D into pcis data */ 5287 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5288 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5289 pcis->ksp_D = pcbddc->ksp_D; 5290 } 5291 5292 /* NEUMANN PROBLEM */ 5293 A_RR = 0; 5294 if (neumann) { 5295 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5296 PetscInt ibs,mbs; 5297 PetscBool issbaij, reuse_neumann_solver; 5298 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5299 5300 reuse_neumann_solver = PETSC_FALSE; 5301 if (sub_schurs && sub_schurs->reuse_solver) { 5302 IS iP; 5303 5304 reuse_neumann_solver = PETSC_TRUE; 5305 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5306 if (iP) reuse_neumann_solver = PETSC_FALSE; 5307 } 5308 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5309 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5310 if (pcbddc->ksp_R) { /* already created ksp */ 5311 PetscInt nn_R; 5312 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5313 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5314 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5315 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5316 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5317 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5318 reuse = MAT_INITIAL_MATRIX; 5319 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5320 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5321 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5322 reuse = MAT_INITIAL_MATRIX; 5323 } else { /* safe to reuse the matrix */ 5324 reuse = MAT_REUSE_MATRIX; 5325 } 5326 } 5327 /* last check */ 5328 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5329 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5330 reuse = MAT_INITIAL_MATRIX; 5331 } 5332 } else { /* first time, so we need to create the matrix */ 5333 reuse = MAT_INITIAL_MATRIX; 5334 } 5335 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5336 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5337 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5338 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5339 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5340 if (matis->A == pcbddc->local_mat) { 5341 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5342 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5343 } else { 5344 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5345 } 5346 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5347 if (matis->A == pcbddc->local_mat) { 5348 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5349 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5350 } else { 5351 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5352 } 5353 } 5354 /* extract A_RR */ 5355 if (reuse_neumann_solver) { 5356 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5357 5358 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5359 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5360 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5361 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5362 } else { 5363 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5364 } 5365 } else { 5366 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5367 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5368 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5369 } 5370 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5371 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5372 } 5373 if (pcbddc->local_mat->symmetric_set) { 5374 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5375 } 5376 if (!pcbddc->ksp_R) { /* create object if not present */ 5377 void (*f)(void) = 0; 5378 5379 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5380 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5381 /* default */ 5382 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5383 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5384 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5385 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5386 if (issbaij) { 5387 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5388 } else { 5389 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5390 } 5391 /* Allow user's customization */ 5392 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5393 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5394 if (f && pcbddc->mat_graph->cloc) { 5395 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5396 const PetscInt *idxs; 5397 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5398 5399 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5400 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5401 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5402 for (i=0;i<nl;i++) { 5403 for (d=0;d<cdim;d++) { 5404 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5405 } 5406 } 5407 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5408 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5409 ierr = PetscFree(scoords);CHKERRQ(ierr); 5410 } 5411 } 5412 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5413 if (!n_R) { 5414 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5415 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5416 } 5417 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5418 /* Reuse solver if it is present */ 5419 if (reuse_neumann_solver) { 5420 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5421 5422 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5423 } 5424 } 5425 5426 if (pcbddc->dbg_flag) { 5427 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5428 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5429 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5430 } 5431 5432 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5433 check_corr = PETSC_FALSE; 5434 if (pcbddc->NullSpace_corr[0]) { 5435 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5436 } 5437 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5438 check_corr = PETSC_TRUE; 5439 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5440 } 5441 if (neumann && pcbddc->NullSpace_corr[2]) { 5442 check_corr = PETSC_TRUE; 5443 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5444 } 5445 /* check Dirichlet and Neumann solvers */ 5446 if (pcbddc->dbg_flag) { 5447 if (dirichlet) { /* Dirichlet */ 5448 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5449 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5450 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5451 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5452 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5453 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5454 if (check_corr) { 5455 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5456 } 5457 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5458 } 5459 if (neumann) { /* Neumann */ 5460 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5461 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5462 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5463 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5464 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5465 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5466 if (check_corr) { 5467 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5468 } 5469 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5470 } 5471 } 5472 /* free Neumann problem's matrix */ 5473 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5474 PetscFunctionReturn(0); 5475 } 5476 5477 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5478 { 5479 PetscErrorCode ierr; 5480 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5481 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5482 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5483 5484 PetscFunctionBegin; 5485 if (!reuse_solver) { 5486 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5487 } 5488 if (!pcbddc->switch_static) { 5489 if (applytranspose && pcbddc->local_auxmat1) { 5490 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5491 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5492 } 5493 if (!reuse_solver) { 5494 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5495 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5496 } else { 5497 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5498 5499 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5500 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5501 } 5502 } else { 5503 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5504 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5505 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5506 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5507 if (applytranspose && pcbddc->local_auxmat1) { 5508 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5509 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5510 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5511 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5512 } 5513 } 5514 if (!reuse_solver || pcbddc->switch_static) { 5515 if (applytranspose) { 5516 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5517 } else { 5518 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5519 } 5520 } else { 5521 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5522 5523 if (applytranspose) { 5524 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5525 } else { 5526 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5527 } 5528 } 5529 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5530 if (!pcbddc->switch_static) { 5531 if (!reuse_solver) { 5532 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5533 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5534 } else { 5535 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5536 5537 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5538 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5539 } 5540 if (!applytranspose && pcbddc->local_auxmat1) { 5541 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5542 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5543 } 5544 } else { 5545 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5546 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5547 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5548 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5549 if (!applytranspose && pcbddc->local_auxmat1) { 5550 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5551 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5552 } 5553 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5554 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5555 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5556 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5557 } 5558 PetscFunctionReturn(0); 5559 } 5560 5561 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5562 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5563 { 5564 PetscErrorCode ierr; 5565 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5566 PC_IS* pcis = (PC_IS*) (pc->data); 5567 const PetscScalar zero = 0.0; 5568 5569 PetscFunctionBegin; 5570 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5571 if (!pcbddc->benign_apply_coarse_only) { 5572 if (applytranspose) { 5573 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5574 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5575 } else { 5576 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5577 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5578 } 5579 } else { 5580 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5581 } 5582 5583 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5584 if (pcbddc->benign_n) { 5585 PetscScalar *array; 5586 PetscInt j; 5587 5588 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5589 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5590 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5591 } 5592 5593 /* start communications from local primal nodes to rhs of coarse solver */ 5594 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5595 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5596 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5597 5598 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5599 if (pcbddc->coarse_ksp) { 5600 Mat coarse_mat; 5601 Vec rhs,sol; 5602 MatNullSpace nullsp; 5603 PetscBool isbddc = PETSC_FALSE; 5604 5605 if (pcbddc->benign_have_null) { 5606 PC coarse_pc; 5607 5608 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5609 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5610 /* we need to propagate to coarser levels the need for a possible benign correction */ 5611 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5612 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5613 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5614 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5615 } 5616 } 5617 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5618 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5619 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5620 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5621 if (nullsp) { 5622 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5623 } 5624 if (applytranspose) { 5625 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5626 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5627 } else { 5628 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5629 PC coarse_pc; 5630 5631 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5632 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5633 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5634 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5635 } else { 5636 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5637 } 5638 } 5639 /* we don't need the benign correction at coarser levels anymore */ 5640 if (pcbddc->benign_have_null && isbddc) { 5641 PC coarse_pc; 5642 PC_BDDC* coarsepcbddc; 5643 5644 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5645 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5646 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5647 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5648 } 5649 if (nullsp) { 5650 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5651 } 5652 } 5653 5654 /* Local solution on R nodes */ 5655 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5656 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5657 } 5658 /* communications from coarse sol to local primal nodes */ 5659 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5660 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5661 5662 /* Sum contributions from the two levels */ 5663 if (!pcbddc->benign_apply_coarse_only) { 5664 if (applytranspose) { 5665 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5666 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5667 } else { 5668 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5669 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5670 } 5671 /* store p0 */ 5672 if (pcbddc->benign_n) { 5673 PetscScalar *array; 5674 PetscInt j; 5675 5676 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5677 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5678 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5679 } 5680 } else { /* expand the coarse solution */ 5681 if (applytranspose) { 5682 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5683 } else { 5684 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5685 } 5686 } 5687 PetscFunctionReturn(0); 5688 } 5689 5690 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5691 { 5692 PetscErrorCode ierr; 5693 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5694 PetscScalar *array; 5695 Vec from,to; 5696 5697 PetscFunctionBegin; 5698 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5699 from = pcbddc->coarse_vec; 5700 to = pcbddc->vec1_P; 5701 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5702 Vec tvec; 5703 5704 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5705 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5706 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5707 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5708 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5709 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5710 } 5711 } else { /* from local to global -> put data in coarse right hand side */ 5712 from = pcbddc->vec1_P; 5713 to = pcbddc->coarse_vec; 5714 } 5715 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5716 PetscFunctionReturn(0); 5717 } 5718 5719 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5720 { 5721 PetscErrorCode ierr; 5722 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5723 PetscScalar *array; 5724 Vec from,to; 5725 5726 PetscFunctionBegin; 5727 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5728 from = pcbddc->coarse_vec; 5729 to = pcbddc->vec1_P; 5730 } else { /* from local to global -> put data in coarse right hand side */ 5731 from = pcbddc->vec1_P; 5732 to = pcbddc->coarse_vec; 5733 } 5734 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5735 if (smode == SCATTER_FORWARD) { 5736 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5737 Vec tvec; 5738 5739 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5740 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5741 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5742 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5743 } 5744 } else { 5745 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5746 ierr = VecResetArray(from);CHKERRQ(ierr); 5747 } 5748 } 5749 PetscFunctionReturn(0); 5750 } 5751 5752 /* uncomment for testing purposes */ 5753 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5754 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5755 { 5756 PetscErrorCode ierr; 5757 PC_IS* pcis = (PC_IS*)(pc->data); 5758 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5759 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5760 /* one and zero */ 5761 PetscScalar one=1.0,zero=0.0; 5762 /* space to store constraints and their local indices */ 5763 PetscScalar *constraints_data; 5764 PetscInt *constraints_idxs,*constraints_idxs_B; 5765 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5766 PetscInt *constraints_n; 5767 /* iterators */ 5768 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5769 /* BLAS integers */ 5770 PetscBLASInt lwork,lierr; 5771 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5772 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5773 /* reuse */ 5774 PetscInt olocal_primal_size,olocal_primal_size_cc; 5775 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5776 /* change of basis */ 5777 PetscBool qr_needed; 5778 PetscBT change_basis,qr_needed_idx; 5779 /* auxiliary stuff */ 5780 PetscInt *nnz,*is_indices; 5781 PetscInt ncc; 5782 /* some quantities */ 5783 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5784 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5785 PetscReal tol; /* tolerance for retaining eigenmodes */ 5786 5787 PetscFunctionBegin; 5788 tol = PetscSqrtReal(PETSC_SMALL); 5789 /* Destroy Mat objects computed previously */ 5790 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5791 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5792 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5793 /* save info on constraints from previous setup (if any) */ 5794 olocal_primal_size = pcbddc->local_primal_size; 5795 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5796 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5797 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5798 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5799 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5800 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5801 5802 if (!pcbddc->adaptive_selection) { 5803 IS ISForVertices,*ISForFaces,*ISForEdges; 5804 MatNullSpace nearnullsp; 5805 const Vec *nearnullvecs; 5806 Vec *localnearnullsp; 5807 PetscScalar *array; 5808 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5809 PetscBool nnsp_has_cnst; 5810 /* LAPACK working arrays for SVD or POD */ 5811 PetscBool skip_lapack,boolforchange; 5812 PetscScalar *work; 5813 PetscReal *singular_vals; 5814 #if defined(PETSC_USE_COMPLEX) 5815 PetscReal *rwork; 5816 #endif 5817 #if defined(PETSC_MISSING_LAPACK_GESVD) 5818 PetscScalar *temp_basis,*correlation_mat; 5819 #else 5820 PetscBLASInt dummy_int=1; 5821 PetscScalar dummy_scalar=1.; 5822 #endif 5823 5824 /* Get index sets for faces, edges and vertices from graph */ 5825 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5826 /* print some info */ 5827 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5828 PetscInt nv; 5829 5830 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5831 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5832 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5833 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5834 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5835 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5836 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5837 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5838 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5839 } 5840 5841 /* free unneeded index sets */ 5842 if (!pcbddc->use_vertices) { 5843 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5844 } 5845 if (!pcbddc->use_edges) { 5846 for (i=0;i<n_ISForEdges;i++) { 5847 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5848 } 5849 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5850 n_ISForEdges = 0; 5851 } 5852 if (!pcbddc->use_faces) { 5853 for (i=0;i<n_ISForFaces;i++) { 5854 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5855 } 5856 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5857 n_ISForFaces = 0; 5858 } 5859 5860 /* check if near null space is attached to global mat */ 5861 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5862 if (nearnullsp) { 5863 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5864 /* remove any stored info */ 5865 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5866 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5867 /* store information for BDDC solver reuse */ 5868 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5869 pcbddc->onearnullspace = nearnullsp; 5870 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5871 for (i=0;i<nnsp_size;i++) { 5872 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5873 } 5874 } else { /* if near null space is not provided BDDC uses constants by default */ 5875 nnsp_size = 0; 5876 nnsp_has_cnst = PETSC_TRUE; 5877 } 5878 /* get max number of constraints on a single cc */ 5879 max_constraints = nnsp_size; 5880 if (nnsp_has_cnst) max_constraints++; 5881 5882 /* 5883 Evaluate maximum storage size needed by the procedure 5884 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5885 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5886 There can be multiple constraints per connected component 5887 */ 5888 n_vertices = 0; 5889 if (ISForVertices) { 5890 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5891 } 5892 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5893 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5894 5895 total_counts = n_ISForFaces+n_ISForEdges; 5896 total_counts *= max_constraints; 5897 total_counts += n_vertices; 5898 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5899 5900 total_counts = 0; 5901 max_size_of_constraint = 0; 5902 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5903 IS used_is; 5904 if (i<n_ISForEdges) { 5905 used_is = ISForEdges[i]; 5906 } else { 5907 used_is = ISForFaces[i-n_ISForEdges]; 5908 } 5909 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5910 total_counts += j; 5911 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5912 } 5913 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5914 5915 /* get local part of global near null space vectors */ 5916 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5917 for (k=0;k<nnsp_size;k++) { 5918 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5919 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5920 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5921 } 5922 5923 /* whether or not to skip lapack calls */ 5924 skip_lapack = PETSC_TRUE; 5925 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5926 5927 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5928 if (!skip_lapack) { 5929 PetscScalar temp_work; 5930 5931 #if defined(PETSC_MISSING_LAPACK_GESVD) 5932 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5933 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5934 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5935 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5936 #if defined(PETSC_USE_COMPLEX) 5937 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5938 #endif 5939 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5940 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5941 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5942 lwork = -1; 5943 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5944 #if !defined(PETSC_USE_COMPLEX) 5945 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5946 #else 5947 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5948 #endif 5949 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5950 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5951 #else /* on missing GESVD */ 5952 /* SVD */ 5953 PetscInt max_n,min_n; 5954 max_n = max_size_of_constraint; 5955 min_n = max_constraints; 5956 if (max_size_of_constraint < max_constraints) { 5957 min_n = max_size_of_constraint; 5958 max_n = max_constraints; 5959 } 5960 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5961 #if defined(PETSC_USE_COMPLEX) 5962 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5963 #endif 5964 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5965 lwork = -1; 5966 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5967 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5968 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5969 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5970 #if !defined(PETSC_USE_COMPLEX) 5971 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5972 #else 5973 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5974 #endif 5975 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5976 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5977 #endif /* on missing GESVD */ 5978 /* Allocate optimal workspace */ 5979 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5980 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5981 } 5982 /* Now we can loop on constraining sets */ 5983 total_counts = 0; 5984 constraints_idxs_ptr[0] = 0; 5985 constraints_data_ptr[0] = 0; 5986 /* vertices */ 5987 if (n_vertices) { 5988 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5989 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5990 for (i=0;i<n_vertices;i++) { 5991 constraints_n[total_counts] = 1; 5992 constraints_data[total_counts] = 1.0; 5993 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5994 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5995 total_counts++; 5996 } 5997 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5998 n_vertices = total_counts; 5999 } 6000 6001 /* edges and faces */ 6002 total_counts_cc = total_counts; 6003 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6004 IS used_is; 6005 PetscBool idxs_copied = PETSC_FALSE; 6006 6007 if (ncc<n_ISForEdges) { 6008 used_is = ISForEdges[ncc]; 6009 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6010 } else { 6011 used_is = ISForFaces[ncc-n_ISForEdges]; 6012 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6013 } 6014 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6015 6016 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6017 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6018 /* change of basis should not be performed on local periodic nodes */ 6019 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6020 if (nnsp_has_cnst) { 6021 PetscScalar quad_value; 6022 6023 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6024 idxs_copied = PETSC_TRUE; 6025 6026 if (!pcbddc->use_nnsp_true) { 6027 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6028 } else { 6029 quad_value = 1.0; 6030 } 6031 for (j=0;j<size_of_constraint;j++) { 6032 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6033 } 6034 temp_constraints++; 6035 total_counts++; 6036 } 6037 for (k=0;k<nnsp_size;k++) { 6038 PetscReal real_value; 6039 PetscScalar *ptr_to_data; 6040 6041 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6042 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6043 for (j=0;j<size_of_constraint;j++) { 6044 ptr_to_data[j] = array[is_indices[j]]; 6045 } 6046 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6047 /* check if array is null on the connected component */ 6048 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6049 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6050 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6051 temp_constraints++; 6052 total_counts++; 6053 if (!idxs_copied) { 6054 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6055 idxs_copied = PETSC_TRUE; 6056 } 6057 } 6058 } 6059 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6060 valid_constraints = temp_constraints; 6061 if (!pcbddc->use_nnsp_true && temp_constraints) { 6062 if (temp_constraints == 1) { /* just normalize the constraint */ 6063 PetscScalar norm,*ptr_to_data; 6064 6065 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6066 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6067 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6068 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6069 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6070 } else { /* perform SVD */ 6071 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6072 6073 #if defined(PETSC_MISSING_LAPACK_GESVD) 6074 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6075 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6076 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6077 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6078 from that computed using LAPACKgesvd 6079 -> This is due to a different computation of eigenvectors in LAPACKheev 6080 -> The quality of the POD-computed basis will be the same */ 6081 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 6082 /* Store upper triangular part of correlation matrix */ 6083 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6084 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6085 for (j=0;j<temp_constraints;j++) { 6086 for (k=0;k<j+1;k++) { 6087 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6088 } 6089 } 6090 /* compute eigenvalues and eigenvectors of correlation matrix */ 6091 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6092 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6093 #if !defined(PETSC_USE_COMPLEX) 6094 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6095 #else 6096 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6097 #endif 6098 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6099 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6100 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6101 j = 0; 6102 while (j < temp_constraints && singular_vals[j] < tol) j++; 6103 total_counts = total_counts-j; 6104 valid_constraints = temp_constraints-j; 6105 /* scale and copy POD basis into used quadrature memory */ 6106 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6107 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6108 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6109 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6110 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6111 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6112 if (j<temp_constraints) { 6113 PetscInt ii; 6114 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6115 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6116 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6117 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6118 for (k=0;k<temp_constraints-j;k++) { 6119 for (ii=0;ii<size_of_constraint;ii++) { 6120 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6121 } 6122 } 6123 } 6124 #else /* on missing GESVD */ 6125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6126 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6127 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6128 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6129 #if !defined(PETSC_USE_COMPLEX) 6130 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6131 #else 6132 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6133 #endif 6134 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6135 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6136 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6137 k = temp_constraints; 6138 if (k > size_of_constraint) k = size_of_constraint; 6139 j = 0; 6140 while (j < k && singular_vals[k-j-1] < tol) j++; 6141 valid_constraints = k-j; 6142 total_counts = total_counts-temp_constraints+valid_constraints; 6143 #endif /* on missing GESVD */ 6144 } 6145 } 6146 /* update pointers information */ 6147 if (valid_constraints) { 6148 constraints_n[total_counts_cc] = valid_constraints; 6149 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6150 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6151 /* set change_of_basis flag */ 6152 if (boolforchange) { 6153 PetscBTSet(change_basis,total_counts_cc); 6154 } 6155 total_counts_cc++; 6156 } 6157 } 6158 /* free workspace */ 6159 if (!skip_lapack) { 6160 ierr = PetscFree(work);CHKERRQ(ierr); 6161 #if defined(PETSC_USE_COMPLEX) 6162 ierr = PetscFree(rwork);CHKERRQ(ierr); 6163 #endif 6164 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6165 #if defined(PETSC_MISSING_LAPACK_GESVD) 6166 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6167 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6168 #endif 6169 } 6170 for (k=0;k<nnsp_size;k++) { 6171 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6172 } 6173 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6174 /* free index sets of faces, edges and vertices */ 6175 for (i=0;i<n_ISForFaces;i++) { 6176 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6177 } 6178 if (n_ISForFaces) { 6179 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6180 } 6181 for (i=0;i<n_ISForEdges;i++) { 6182 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6183 } 6184 if (n_ISForEdges) { 6185 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6186 } 6187 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6188 } else { 6189 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6190 6191 total_counts = 0; 6192 n_vertices = 0; 6193 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6194 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6195 } 6196 max_constraints = 0; 6197 total_counts_cc = 0; 6198 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6199 total_counts += pcbddc->adaptive_constraints_n[i]; 6200 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6201 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6202 } 6203 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6204 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6205 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6206 constraints_data = pcbddc->adaptive_constraints_data; 6207 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6208 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6209 total_counts_cc = 0; 6210 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6211 if (pcbddc->adaptive_constraints_n[i]) { 6212 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6213 } 6214 } 6215 #if 0 6216 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 6217 for (i=0;i<total_counts_cc;i++) { 6218 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 6219 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 6220 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 6221 printf(" %d",constraints_idxs[j]); 6222 } 6223 printf("\n"); 6224 printf("number of cc: %d\n",constraints_n[i]); 6225 } 6226 for (i=0;i<n_vertices;i++) { 6227 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 6228 } 6229 for (i=0;i<sub_schurs->n_subs;i++) { 6230 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 6231 } 6232 #endif 6233 6234 max_size_of_constraint = 0; 6235 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6236 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6237 /* Change of basis */ 6238 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6239 if (pcbddc->use_change_of_basis) { 6240 for (i=0;i<sub_schurs->n_subs;i++) { 6241 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6242 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6243 } 6244 } 6245 } 6246 } 6247 pcbddc->local_primal_size = total_counts; 6248 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6249 6250 /* map constraints_idxs in boundary numbering */ 6251 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6252 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 6253 6254 /* Create constraint matrix */ 6255 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6256 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6257 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6258 6259 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6260 /* determine if a QR strategy is needed for change of basis */ 6261 qr_needed = PETSC_FALSE; 6262 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6263 total_primal_vertices=0; 6264 pcbddc->local_primal_size_cc = 0; 6265 for (i=0;i<total_counts_cc;i++) { 6266 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6267 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6268 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6269 pcbddc->local_primal_size_cc += 1; 6270 } else if (PetscBTLookup(change_basis,i)) { 6271 for (k=0;k<constraints_n[i];k++) { 6272 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6273 } 6274 pcbddc->local_primal_size_cc += constraints_n[i]; 6275 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6276 PetscBTSet(qr_needed_idx,i); 6277 qr_needed = PETSC_TRUE; 6278 } 6279 } else { 6280 pcbddc->local_primal_size_cc += 1; 6281 } 6282 } 6283 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6284 pcbddc->n_vertices = total_primal_vertices; 6285 /* permute indices in order to have a sorted set of vertices */ 6286 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6287 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6288 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6289 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6290 6291 /* nonzero structure of constraint matrix */ 6292 /* and get reference dof for local constraints */ 6293 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6294 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6295 6296 j = total_primal_vertices; 6297 total_counts = total_primal_vertices; 6298 cum = total_primal_vertices; 6299 for (i=n_vertices;i<total_counts_cc;i++) { 6300 if (!PetscBTLookup(change_basis,i)) { 6301 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6302 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6303 cum++; 6304 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6305 for (k=0;k<constraints_n[i];k++) { 6306 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6307 nnz[j+k] = size_of_constraint; 6308 } 6309 j += constraints_n[i]; 6310 } 6311 } 6312 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6313 ierr = PetscFree(nnz);CHKERRQ(ierr); 6314 6315 /* set values in constraint matrix */ 6316 for (i=0;i<total_primal_vertices;i++) { 6317 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6318 } 6319 total_counts = total_primal_vertices; 6320 for (i=n_vertices;i<total_counts_cc;i++) { 6321 if (!PetscBTLookup(change_basis,i)) { 6322 PetscInt *cols; 6323 6324 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6325 cols = constraints_idxs+constraints_idxs_ptr[i]; 6326 for (k=0;k<constraints_n[i];k++) { 6327 PetscInt row = total_counts+k; 6328 PetscScalar *vals; 6329 6330 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6331 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6332 } 6333 total_counts += constraints_n[i]; 6334 } 6335 } 6336 /* assembling */ 6337 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6338 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6339 ierr = MatChop(pcbddc->ConstraintMatrix,PETSC_SMALL);CHKERRQ(ierr); 6340 ierr = MatSeqAIJCompress(pcbddc->ConstraintMatrix,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6341 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6342 6343 /* 6344 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6345 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6346 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6347 */ 6348 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6349 if (pcbddc->use_change_of_basis) { 6350 /* dual and primal dofs on a single cc */ 6351 PetscInt dual_dofs,primal_dofs; 6352 /* working stuff for GEQRF */ 6353 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6354 PetscBLASInt lqr_work; 6355 /* working stuff for UNGQR */ 6356 PetscScalar *gqr_work,lgqr_work_t; 6357 PetscBLASInt lgqr_work; 6358 /* working stuff for TRTRS */ 6359 PetscScalar *trs_rhs; 6360 PetscBLASInt Blas_NRHS; 6361 /* pointers for values insertion into change of basis matrix */ 6362 PetscInt *start_rows,*start_cols; 6363 PetscScalar *start_vals; 6364 /* working stuff for values insertion */ 6365 PetscBT is_primal; 6366 PetscInt *aux_primal_numbering_B; 6367 /* matrix sizes */ 6368 PetscInt global_size,local_size; 6369 /* temporary change of basis */ 6370 Mat localChangeOfBasisMatrix; 6371 /* extra space for debugging */ 6372 PetscScalar *dbg_work; 6373 6374 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6375 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6376 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6377 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6378 /* nonzeros for local mat */ 6379 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6380 if (!pcbddc->benign_change || pcbddc->fake_change) { 6381 for (i=0;i<pcis->n;i++) nnz[i]=1; 6382 } else { 6383 const PetscInt *ii; 6384 PetscInt n; 6385 PetscBool flg_row; 6386 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6387 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6388 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6389 } 6390 for (i=n_vertices;i<total_counts_cc;i++) { 6391 if (PetscBTLookup(change_basis,i)) { 6392 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6393 if (PetscBTLookup(qr_needed_idx,i)) { 6394 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6395 } else { 6396 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6397 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6398 } 6399 } 6400 } 6401 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6402 ierr = PetscFree(nnz);CHKERRQ(ierr); 6403 /* Set interior change in the matrix */ 6404 if (!pcbddc->benign_change || pcbddc->fake_change) { 6405 for (i=0;i<pcis->n;i++) { 6406 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6407 } 6408 } else { 6409 const PetscInt *ii,*jj; 6410 PetscScalar *aa; 6411 PetscInt n; 6412 PetscBool flg_row; 6413 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6414 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6415 for (i=0;i<n;i++) { 6416 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6417 } 6418 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6419 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6420 } 6421 6422 if (pcbddc->dbg_flag) { 6423 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6424 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6425 } 6426 6427 6428 /* Now we loop on the constraints which need a change of basis */ 6429 /* 6430 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6431 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6432 6433 Basic blocks of change of basis matrix T computed by 6434 6435 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6436 6437 | 1 0 ... 0 s_1/S | 6438 | 0 1 ... 0 s_2/S | 6439 | ... | 6440 | 0 ... 1 s_{n-1}/S | 6441 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6442 6443 with S = \sum_{i=1}^n s_i^2 6444 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6445 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6446 6447 - QR decomposition of constraints otherwise 6448 */ 6449 if (qr_needed) { 6450 /* space to store Q */ 6451 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6452 /* array to store scaling factors for reflectors */ 6453 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6454 /* first we issue queries for optimal work */ 6455 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6456 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6457 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6458 lqr_work = -1; 6459 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6460 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6461 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6462 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6463 lgqr_work = -1; 6464 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6465 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6466 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6467 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6468 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6469 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6470 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6471 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6472 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6473 /* array to store rhs and solution of triangular solver */ 6474 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6475 /* allocating workspace for check */ 6476 if (pcbddc->dbg_flag) { 6477 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6478 } 6479 } 6480 /* array to store whether a node is primal or not */ 6481 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6482 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6483 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6484 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6485 for (i=0;i<total_primal_vertices;i++) { 6486 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6487 } 6488 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6489 6490 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6491 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6492 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6493 if (PetscBTLookup(change_basis,total_counts)) { 6494 /* get constraint info */ 6495 primal_dofs = constraints_n[total_counts]; 6496 dual_dofs = size_of_constraint-primal_dofs; 6497 6498 if (pcbddc->dbg_flag) { 6499 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6500 } 6501 6502 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6503 6504 /* copy quadrature constraints for change of basis check */ 6505 if (pcbddc->dbg_flag) { 6506 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6507 } 6508 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6509 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6510 6511 /* compute QR decomposition of constraints */ 6512 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6513 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6515 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6516 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6517 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6518 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6519 6520 /* explictly compute R^-T */ 6521 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6522 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6523 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6524 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6525 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6526 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6527 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6528 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6529 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6530 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6531 6532 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6535 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6538 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6539 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6540 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6541 6542 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6543 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6544 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6545 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6546 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6547 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6548 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6549 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6552 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6553 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6554 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6555 6556 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6557 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6558 /* insert cols for primal dofs */ 6559 for (j=0;j<primal_dofs;j++) { 6560 start_vals = &qr_basis[j*size_of_constraint]; 6561 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6562 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6563 } 6564 /* insert cols for dual dofs */ 6565 for (j=0,k=0;j<dual_dofs;k++) { 6566 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6567 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6568 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6569 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6570 j++; 6571 } 6572 } 6573 6574 /* check change of basis */ 6575 if (pcbddc->dbg_flag) { 6576 PetscInt ii,jj; 6577 PetscBool valid_qr=PETSC_TRUE; 6578 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6579 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6580 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6581 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6582 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6583 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6584 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6585 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6586 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6587 for (jj=0;jj<size_of_constraint;jj++) { 6588 for (ii=0;ii<primal_dofs;ii++) { 6589 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6590 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6591 } 6592 } 6593 if (!valid_qr) { 6594 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6595 for (jj=0;jj<size_of_constraint;jj++) { 6596 for (ii=0;ii<primal_dofs;ii++) { 6597 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6598 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6599 } 6600 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6601 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6602 } 6603 } 6604 } 6605 } else { 6606 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6607 } 6608 } 6609 } else { /* simple transformation block */ 6610 PetscInt row,col; 6611 PetscScalar val,norm; 6612 6613 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6614 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6615 for (j=0;j<size_of_constraint;j++) { 6616 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6617 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6618 if (!PetscBTLookup(is_primal,row_B)) { 6619 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6620 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6621 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6622 } else { 6623 for (k=0;k<size_of_constraint;k++) { 6624 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6625 if (row != col) { 6626 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6627 } else { 6628 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6629 } 6630 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6631 } 6632 } 6633 } 6634 if (pcbddc->dbg_flag) { 6635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6636 } 6637 } 6638 } else { 6639 if (pcbddc->dbg_flag) { 6640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6641 } 6642 } 6643 } 6644 6645 /* free workspace */ 6646 if (qr_needed) { 6647 if (pcbddc->dbg_flag) { 6648 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6649 } 6650 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6651 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6652 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6653 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6654 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6655 } 6656 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6657 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6658 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6659 6660 /* assembling of global change of variable */ 6661 if (!pcbddc->fake_change) { 6662 Mat tmat; 6663 PetscInt bs; 6664 6665 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6666 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6667 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6668 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6669 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6670 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6671 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6672 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6673 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6674 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6675 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6676 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6677 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6678 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6679 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6680 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6681 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6682 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6683 6684 /* check */ 6685 if (pcbddc->dbg_flag) { 6686 PetscReal error; 6687 Vec x,x_change; 6688 6689 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6690 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6691 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6692 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6693 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6694 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6695 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6696 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6697 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6698 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6699 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6700 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6701 if (error > PETSC_SMALL) { 6702 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6703 } 6704 ierr = VecDestroy(&x);CHKERRQ(ierr); 6705 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6706 } 6707 /* adapt sub_schurs computed (if any) */ 6708 if (pcbddc->use_deluxe_scaling) { 6709 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6710 6711 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6712 if (sub_schurs && sub_schurs->S_Ej_all) { 6713 Mat S_new,tmat; 6714 IS is_all_N,is_V_Sall = NULL; 6715 6716 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6717 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6718 if (pcbddc->deluxe_zerorows) { 6719 ISLocalToGlobalMapping NtoSall; 6720 IS is_V; 6721 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6722 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6723 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6724 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6725 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6726 } 6727 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6728 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6729 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6730 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6731 if (pcbddc->deluxe_zerorows) { 6732 const PetscScalar *array; 6733 const PetscInt *idxs_V,*idxs_all; 6734 PetscInt i,n_V; 6735 6736 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6737 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6738 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6739 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6740 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6741 for (i=0;i<n_V;i++) { 6742 PetscScalar val; 6743 PetscInt idx; 6744 6745 idx = idxs_V[i]; 6746 val = array[idxs_all[idxs_V[i]]]; 6747 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6748 } 6749 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6750 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6751 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6752 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6753 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6754 } 6755 sub_schurs->S_Ej_all = S_new; 6756 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6757 if (sub_schurs->sum_S_Ej_all) { 6758 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6759 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6760 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6761 if (pcbddc->deluxe_zerorows) { 6762 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6763 } 6764 sub_schurs->sum_S_Ej_all = S_new; 6765 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6766 } 6767 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6768 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6769 } 6770 /* destroy any change of basis context in sub_schurs */ 6771 if (sub_schurs && sub_schurs->change) { 6772 PetscInt i; 6773 6774 for (i=0;i<sub_schurs->n_subs;i++) { 6775 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6776 } 6777 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6778 } 6779 } 6780 if (pcbddc->switch_static) { /* need to save the local change */ 6781 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6782 } else { 6783 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6784 } 6785 /* determine if any process has changed the pressures locally */ 6786 pcbddc->change_interior = pcbddc->benign_have_null; 6787 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6788 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6789 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6790 pcbddc->use_qr_single = qr_needed; 6791 } 6792 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6793 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6794 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6795 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6796 } else { 6797 Mat benign_global = NULL; 6798 if (pcbddc->benign_have_null) { 6799 Mat tmat; 6800 6801 pcbddc->change_interior = PETSC_TRUE; 6802 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6803 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6804 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6805 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6806 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6807 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6808 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6809 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6810 if (pcbddc->benign_change) { 6811 Mat M; 6812 6813 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6814 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6815 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6816 ierr = MatDestroy(&M);CHKERRQ(ierr); 6817 } else { 6818 Mat eye; 6819 PetscScalar *array; 6820 6821 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6822 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6823 for (i=0;i<pcis->n;i++) { 6824 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6825 } 6826 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6827 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6828 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6829 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6830 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6831 } 6832 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6833 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6834 } 6835 if (pcbddc->user_ChangeOfBasisMatrix) { 6836 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6837 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6838 } else if (pcbddc->benign_have_null) { 6839 pcbddc->ChangeOfBasisMatrix = benign_global; 6840 } 6841 } 6842 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6843 IS is_global; 6844 const PetscInt *gidxs; 6845 6846 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6847 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6848 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6849 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6850 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6851 } 6852 } 6853 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6854 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6855 } 6856 6857 if (!pcbddc->fake_change) { 6858 /* add pressure dofs to set of primal nodes for numbering purposes */ 6859 for (i=0;i<pcbddc->benign_n;i++) { 6860 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6861 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6862 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6863 pcbddc->local_primal_size_cc++; 6864 pcbddc->local_primal_size++; 6865 } 6866 6867 /* check if a new primal space has been introduced (also take into account benign trick) */ 6868 pcbddc->new_primal_space_local = PETSC_TRUE; 6869 if (olocal_primal_size == pcbddc->local_primal_size) { 6870 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6871 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6872 if (!pcbddc->new_primal_space_local) { 6873 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6874 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6875 } 6876 } 6877 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6878 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6879 } 6880 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6881 6882 /* flush dbg viewer */ 6883 if (pcbddc->dbg_flag) { 6884 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6885 } 6886 6887 /* free workspace */ 6888 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6889 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6890 if (!pcbddc->adaptive_selection) { 6891 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6892 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6893 } else { 6894 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6895 pcbddc->adaptive_constraints_idxs_ptr, 6896 pcbddc->adaptive_constraints_data_ptr, 6897 pcbddc->adaptive_constraints_idxs, 6898 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6899 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6900 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6901 } 6902 PetscFunctionReturn(0); 6903 } 6904 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6905 6906 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6907 { 6908 ISLocalToGlobalMapping map; 6909 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6910 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6911 PetscInt i,N; 6912 PetscBool rcsr = PETSC_FALSE; 6913 PetscErrorCode ierr; 6914 6915 PetscFunctionBegin; 6916 if (pcbddc->recompute_topography) { 6917 pcbddc->graphanalyzed = PETSC_FALSE; 6918 /* Reset previously computed graph */ 6919 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6920 /* Init local Graph struct */ 6921 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6922 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6923 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6924 6925 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6926 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6927 } 6928 /* Check validity of the csr graph passed in by the user */ 6929 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6930 6931 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6932 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6933 PetscInt *xadj,*adjncy; 6934 PetscInt nvtxs; 6935 PetscBool flg_row=PETSC_FALSE; 6936 6937 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6938 if (flg_row) { 6939 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6940 pcbddc->computed_rowadj = PETSC_TRUE; 6941 } 6942 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6943 rcsr = PETSC_TRUE; 6944 } 6945 if (pcbddc->dbg_flag) { 6946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6947 } 6948 6949 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6950 PetscReal *lcoords; 6951 PetscInt n; 6952 MPI_Datatype dimrealtype; 6953 6954 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 6955 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 6956 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 6957 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 6958 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 6959 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 6960 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6961 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6962 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 6963 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 6964 6965 pcbddc->mat_graph->coords = lcoords; 6966 pcbddc->mat_graph->cloc = PETSC_TRUE; 6967 pcbddc->mat_graph->cnloc = n; 6968 } 6969 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 6970 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 6971 6972 /* Setup of Graph */ 6973 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6974 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6975 6976 /* attach info on disconnected subdomains if present */ 6977 if (pcbddc->n_local_subs) { 6978 PetscInt *local_subs; 6979 6980 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6981 for (i=0;i<pcbddc->n_local_subs;i++) { 6982 const PetscInt *idxs; 6983 PetscInt nl,j; 6984 6985 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6986 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6987 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6988 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6989 } 6990 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6991 pcbddc->mat_graph->local_subs = local_subs; 6992 } 6993 } 6994 6995 if (!pcbddc->graphanalyzed) { 6996 /* Graph's connected components analysis */ 6997 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6998 pcbddc->graphanalyzed = PETSC_TRUE; 6999 } 7000 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7001 PetscFunctionReturn(0); 7002 } 7003 7004 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 7005 { 7006 PetscInt i,j; 7007 PetscScalar *alphas; 7008 PetscErrorCode ierr; 7009 7010 PetscFunctionBegin; 7011 if (!n) PetscFunctionReturn(0); 7012 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 7013 ierr = VecNormalize(vecs[0],NULL);CHKERRQ(ierr); 7014 for (i=1;i<n;i++) { 7015 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7016 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7017 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7018 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 7019 } 7020 ierr = PetscFree(alphas);CHKERRQ(ierr); 7021 PetscFunctionReturn(0); 7022 } 7023 7024 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7025 { 7026 Mat A; 7027 PetscInt n_neighs,*neighs,*n_shared,**shared; 7028 PetscMPIInt size,rank,color; 7029 PetscInt *xadj,*adjncy; 7030 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7031 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7032 PetscInt void_procs,*procs_candidates = NULL; 7033 PetscInt xadj_count,*count; 7034 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7035 PetscSubcomm psubcomm; 7036 MPI_Comm subcomm; 7037 PetscErrorCode ierr; 7038 7039 PetscFunctionBegin; 7040 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7041 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7042 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7043 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7044 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7045 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 7046 7047 if (have_void) *have_void = PETSC_FALSE; 7048 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7049 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7050 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7051 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7052 im_active = !!n; 7053 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7054 void_procs = size - active_procs; 7055 /* get ranks of of non-active processes in mat communicator */ 7056 if (void_procs) { 7057 PetscInt ncand; 7058 7059 if (have_void) *have_void = PETSC_TRUE; 7060 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7061 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7062 for (i=0,ncand=0;i<size;i++) { 7063 if (!procs_candidates[i]) { 7064 procs_candidates[ncand++] = i; 7065 } 7066 } 7067 /* force n_subdomains to be not greater that the number of non-active processes */ 7068 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7069 } 7070 7071 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7072 number of subdomains requested 1 -> send to master or first candidate in voids */ 7073 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7074 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7075 PetscInt issize,isidx,dest; 7076 if (*n_subdomains == 1) dest = 0; 7077 else dest = rank; 7078 if (im_active) { 7079 issize = 1; 7080 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7081 isidx = procs_candidates[dest]; 7082 } else { 7083 isidx = dest; 7084 } 7085 } else { 7086 issize = 0; 7087 isidx = -1; 7088 } 7089 if (*n_subdomains != 1) *n_subdomains = active_procs; 7090 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7091 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7092 PetscFunctionReturn(0); 7093 } 7094 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7095 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7096 threshold = PetscMax(threshold,2); 7097 7098 /* Get info on mapping */ 7099 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7100 7101 /* build local CSR graph of subdomains' connectivity */ 7102 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7103 xadj[0] = 0; 7104 xadj[1] = PetscMax(n_neighs-1,0); 7105 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7106 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7107 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7108 for (i=1;i<n_neighs;i++) 7109 for (j=0;j<n_shared[i];j++) 7110 count[shared[i][j]] += 1; 7111 7112 xadj_count = 0; 7113 for (i=1;i<n_neighs;i++) { 7114 for (j=0;j<n_shared[i];j++) { 7115 if (count[shared[i][j]] < threshold) { 7116 adjncy[xadj_count] = neighs[i]; 7117 adjncy_wgt[xadj_count] = n_shared[i]; 7118 xadj_count++; 7119 break; 7120 } 7121 } 7122 } 7123 xadj[1] = xadj_count; 7124 ierr = PetscFree(count);CHKERRQ(ierr); 7125 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7126 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7127 7128 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7129 7130 /* Restrict work on active processes only */ 7131 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7132 if (void_procs) { 7133 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7134 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7135 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7136 subcomm = PetscSubcommChild(psubcomm); 7137 } else { 7138 psubcomm = NULL; 7139 subcomm = PetscObjectComm((PetscObject)mat); 7140 } 7141 7142 v_wgt = NULL; 7143 if (!color) { 7144 ierr = PetscFree(xadj);CHKERRQ(ierr); 7145 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7146 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7147 } else { 7148 Mat subdomain_adj; 7149 IS new_ranks,new_ranks_contig; 7150 MatPartitioning partitioner; 7151 PetscInt rstart=0,rend=0; 7152 PetscInt *is_indices,*oldranks; 7153 PetscMPIInt size; 7154 PetscBool aggregate; 7155 7156 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7157 if (void_procs) { 7158 PetscInt prank = rank; 7159 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7160 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7161 for (i=0;i<xadj[1];i++) { 7162 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7163 } 7164 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7165 } else { 7166 oldranks = NULL; 7167 } 7168 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7169 if (aggregate) { /* TODO: all this part could be made more efficient */ 7170 PetscInt lrows,row,ncols,*cols; 7171 PetscMPIInt nrank; 7172 PetscScalar *vals; 7173 7174 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7175 lrows = 0; 7176 if (nrank<redprocs) { 7177 lrows = size/redprocs; 7178 if (nrank<size%redprocs) lrows++; 7179 } 7180 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7181 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7182 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7183 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7184 row = nrank; 7185 ncols = xadj[1]-xadj[0]; 7186 cols = adjncy; 7187 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7188 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7189 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7190 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7191 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7192 ierr = PetscFree(xadj);CHKERRQ(ierr); 7193 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7194 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7195 ierr = PetscFree(vals);CHKERRQ(ierr); 7196 if (use_vwgt) { 7197 Vec v; 7198 const PetscScalar *array; 7199 PetscInt nl; 7200 7201 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7202 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7203 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7204 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7205 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7206 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7207 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7208 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7209 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7210 ierr = VecDestroy(&v);CHKERRQ(ierr); 7211 } 7212 } else { 7213 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7214 if (use_vwgt) { 7215 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7216 v_wgt[0] = n; 7217 } 7218 } 7219 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7220 7221 /* Partition */ 7222 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7223 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7224 if (v_wgt) { 7225 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7226 } 7227 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7228 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7229 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7230 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7231 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7232 7233 /* renumber new_ranks to avoid "holes" in new set of processors */ 7234 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7235 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7236 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7237 if (!aggregate) { 7238 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7239 #if defined(PETSC_USE_DEBUG) 7240 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7241 #endif 7242 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7243 } else if (oldranks) { 7244 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7245 } else { 7246 ranks_send_to_idx[0] = is_indices[0]; 7247 } 7248 } else { 7249 PetscInt idx = 0; 7250 PetscMPIInt tag; 7251 MPI_Request *reqs; 7252 7253 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7254 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7255 for (i=rstart;i<rend;i++) { 7256 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7257 } 7258 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7259 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7260 ierr = PetscFree(reqs);CHKERRQ(ierr); 7261 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7262 #if defined(PETSC_USE_DEBUG) 7263 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7264 #endif 7265 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7266 } else if (oldranks) { 7267 ranks_send_to_idx[0] = oldranks[idx]; 7268 } else { 7269 ranks_send_to_idx[0] = idx; 7270 } 7271 } 7272 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7273 /* clean up */ 7274 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7275 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7276 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7277 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7278 } 7279 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7280 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7281 7282 /* assemble parallel IS for sends */ 7283 i = 1; 7284 if (!color) i=0; 7285 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7286 PetscFunctionReturn(0); 7287 } 7288 7289 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7290 7291 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7292 { 7293 Mat local_mat; 7294 IS is_sends_internal; 7295 PetscInt rows,cols,new_local_rows; 7296 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7297 PetscBool ismatis,isdense,newisdense,destroy_mat; 7298 ISLocalToGlobalMapping l2gmap; 7299 PetscInt* l2gmap_indices; 7300 const PetscInt* is_indices; 7301 MatType new_local_type; 7302 /* buffers */ 7303 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7304 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7305 PetscInt *recv_buffer_idxs_local; 7306 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7307 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7308 /* MPI */ 7309 MPI_Comm comm,comm_n; 7310 PetscSubcomm subcomm; 7311 PetscMPIInt n_sends,n_recvs,commsize; 7312 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7313 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7314 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7315 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7316 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7317 PetscErrorCode ierr; 7318 7319 PetscFunctionBegin; 7320 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7321 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7322 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7323 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7324 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7325 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7326 PetscValidLogicalCollectiveBool(mat,reuse,6); 7327 PetscValidLogicalCollectiveInt(mat,nis,8); 7328 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7329 if (nvecs) { 7330 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7331 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7332 } 7333 /* further checks */ 7334 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7335 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7336 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7337 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7338 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7339 if (reuse && *mat_n) { 7340 PetscInt mrows,mcols,mnrows,mncols; 7341 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7342 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7343 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7344 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7345 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7346 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7347 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7348 } 7349 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7350 PetscValidLogicalCollectiveInt(mat,bs,0); 7351 7352 /* prepare IS for sending if not provided */ 7353 if (!is_sends) { 7354 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7355 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7356 } else { 7357 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7358 is_sends_internal = is_sends; 7359 } 7360 7361 /* get comm */ 7362 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7363 7364 /* compute number of sends */ 7365 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7366 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7367 7368 /* compute number of receives */ 7369 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7370 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7371 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7372 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7373 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7374 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7375 ierr = PetscFree(iflags);CHKERRQ(ierr); 7376 7377 /* restrict comm if requested */ 7378 subcomm = 0; 7379 destroy_mat = PETSC_FALSE; 7380 if (restrict_comm) { 7381 PetscMPIInt color,subcommsize; 7382 7383 color = 0; 7384 if (restrict_full) { 7385 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7386 } else { 7387 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7388 } 7389 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7390 subcommsize = commsize - subcommsize; 7391 /* check if reuse has been requested */ 7392 if (reuse) { 7393 if (*mat_n) { 7394 PetscMPIInt subcommsize2; 7395 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7396 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7397 comm_n = PetscObjectComm((PetscObject)*mat_n); 7398 } else { 7399 comm_n = PETSC_COMM_SELF; 7400 } 7401 } else { /* MAT_INITIAL_MATRIX */ 7402 PetscMPIInt rank; 7403 7404 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7405 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7406 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7407 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7408 comm_n = PetscSubcommChild(subcomm); 7409 } 7410 /* flag to destroy *mat_n if not significative */ 7411 if (color) destroy_mat = PETSC_TRUE; 7412 } else { 7413 comm_n = comm; 7414 } 7415 7416 /* prepare send/receive buffers */ 7417 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7418 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7419 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7420 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7421 if (nis) { 7422 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7423 } 7424 7425 /* Get data from local matrices */ 7426 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7427 /* TODO: See below some guidelines on how to prepare the local buffers */ 7428 /* 7429 send_buffer_vals should contain the raw values of the local matrix 7430 send_buffer_idxs should contain: 7431 - MatType_PRIVATE type 7432 - PetscInt size_of_l2gmap 7433 - PetscInt global_row_indices[size_of_l2gmap] 7434 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7435 */ 7436 else { 7437 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7438 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7439 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7440 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7441 send_buffer_idxs[1] = i; 7442 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7443 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7444 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7445 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7446 for (i=0;i<n_sends;i++) { 7447 ilengths_vals[is_indices[i]] = len*len; 7448 ilengths_idxs[is_indices[i]] = len+2; 7449 } 7450 } 7451 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7452 /* additional is (if any) */ 7453 if (nis) { 7454 PetscMPIInt psum; 7455 PetscInt j; 7456 for (j=0,psum=0;j<nis;j++) { 7457 PetscInt plen; 7458 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7459 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7460 psum += len+1; /* indices + lenght */ 7461 } 7462 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7463 for (j=0,psum=0;j<nis;j++) { 7464 PetscInt plen; 7465 const PetscInt *is_array_idxs; 7466 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7467 send_buffer_idxs_is[psum] = plen; 7468 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7469 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7470 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7471 psum += plen+1; /* indices + lenght */ 7472 } 7473 for (i=0;i<n_sends;i++) { 7474 ilengths_idxs_is[is_indices[i]] = psum; 7475 } 7476 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7477 } 7478 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7479 7480 buf_size_idxs = 0; 7481 buf_size_vals = 0; 7482 buf_size_idxs_is = 0; 7483 buf_size_vecs = 0; 7484 for (i=0;i<n_recvs;i++) { 7485 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7486 buf_size_vals += (PetscInt)olengths_vals[i]; 7487 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7488 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7489 } 7490 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7491 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7492 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7493 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7494 7495 /* get new tags for clean communications */ 7496 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7497 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7498 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7499 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7500 7501 /* allocate for requests */ 7502 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7503 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7504 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7505 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7506 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7507 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7508 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7509 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7510 7511 /* communications */ 7512 ptr_idxs = recv_buffer_idxs; 7513 ptr_vals = recv_buffer_vals; 7514 ptr_idxs_is = recv_buffer_idxs_is; 7515 ptr_vecs = recv_buffer_vecs; 7516 for (i=0;i<n_recvs;i++) { 7517 source_dest = onodes[i]; 7518 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7519 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7520 ptr_idxs += olengths_idxs[i]; 7521 ptr_vals += olengths_vals[i]; 7522 if (nis) { 7523 source_dest = onodes_is[i]; 7524 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7525 ptr_idxs_is += olengths_idxs_is[i]; 7526 } 7527 if (nvecs) { 7528 source_dest = onodes[i]; 7529 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7530 ptr_vecs += olengths_idxs[i]-2; 7531 } 7532 } 7533 for (i=0;i<n_sends;i++) { 7534 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7535 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7536 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7537 if (nis) { 7538 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7539 } 7540 if (nvecs) { 7541 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7542 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7543 } 7544 } 7545 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7546 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7547 7548 /* assemble new l2g map */ 7549 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7550 ptr_idxs = recv_buffer_idxs; 7551 new_local_rows = 0; 7552 for (i=0;i<n_recvs;i++) { 7553 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7554 ptr_idxs += olengths_idxs[i]; 7555 } 7556 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7557 ptr_idxs = recv_buffer_idxs; 7558 new_local_rows = 0; 7559 for (i=0;i<n_recvs;i++) { 7560 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7561 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7562 ptr_idxs += olengths_idxs[i]; 7563 } 7564 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7565 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7566 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7567 7568 /* infer new local matrix type from received local matrices type */ 7569 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7570 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7571 if (n_recvs) { 7572 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7573 ptr_idxs = recv_buffer_idxs; 7574 for (i=0;i<n_recvs;i++) { 7575 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7576 new_local_type_private = MATAIJ_PRIVATE; 7577 break; 7578 } 7579 ptr_idxs += olengths_idxs[i]; 7580 } 7581 switch (new_local_type_private) { 7582 case MATDENSE_PRIVATE: 7583 new_local_type = MATSEQAIJ; 7584 bs = 1; 7585 break; 7586 case MATAIJ_PRIVATE: 7587 new_local_type = MATSEQAIJ; 7588 bs = 1; 7589 break; 7590 case MATBAIJ_PRIVATE: 7591 new_local_type = MATSEQBAIJ; 7592 break; 7593 case MATSBAIJ_PRIVATE: 7594 new_local_type = MATSEQSBAIJ; 7595 break; 7596 default: 7597 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7598 break; 7599 } 7600 } else { /* by default, new_local_type is seqaij */ 7601 new_local_type = MATSEQAIJ; 7602 bs = 1; 7603 } 7604 7605 /* create MATIS object if needed */ 7606 if (!reuse) { 7607 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7608 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7609 } else { 7610 /* it also destroys the local matrices */ 7611 if (*mat_n) { 7612 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7613 } else { /* this is a fake object */ 7614 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7615 } 7616 } 7617 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7618 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7619 7620 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7621 7622 /* Global to local map of received indices */ 7623 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7624 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7625 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7626 7627 /* restore attributes -> type of incoming data and its size */ 7628 buf_size_idxs = 0; 7629 for (i=0;i<n_recvs;i++) { 7630 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7631 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7632 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7633 } 7634 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7635 7636 /* set preallocation */ 7637 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7638 if (!newisdense) { 7639 PetscInt *new_local_nnz=0; 7640 7641 ptr_idxs = recv_buffer_idxs_local; 7642 if (n_recvs) { 7643 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7644 } 7645 for (i=0;i<n_recvs;i++) { 7646 PetscInt j; 7647 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7648 for (j=0;j<*(ptr_idxs+1);j++) { 7649 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7650 } 7651 } else { 7652 /* TODO */ 7653 } 7654 ptr_idxs += olengths_idxs[i]; 7655 } 7656 if (new_local_nnz) { 7657 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7658 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7659 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7660 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7661 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7662 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7663 } else { 7664 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7665 } 7666 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7667 } else { 7668 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7669 } 7670 7671 /* set values */ 7672 ptr_vals = recv_buffer_vals; 7673 ptr_idxs = recv_buffer_idxs_local; 7674 for (i=0;i<n_recvs;i++) { 7675 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7676 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7677 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7678 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7679 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7680 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7681 } else { 7682 /* TODO */ 7683 } 7684 ptr_idxs += olengths_idxs[i]; 7685 ptr_vals += olengths_vals[i]; 7686 } 7687 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7688 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7689 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7690 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7691 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7692 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7693 7694 #if 0 7695 if (!restrict_comm) { /* check */ 7696 Vec lvec,rvec; 7697 PetscReal infty_error; 7698 7699 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7700 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7701 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7702 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7703 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7704 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7705 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7706 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7707 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7708 } 7709 #endif 7710 7711 /* assemble new additional is (if any) */ 7712 if (nis) { 7713 PetscInt **temp_idxs,*count_is,j,psum; 7714 7715 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7716 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7717 ptr_idxs = recv_buffer_idxs_is; 7718 psum = 0; 7719 for (i=0;i<n_recvs;i++) { 7720 for (j=0;j<nis;j++) { 7721 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7722 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7723 psum += plen; 7724 ptr_idxs += plen+1; /* shift pointer to received data */ 7725 } 7726 } 7727 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7728 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7729 for (i=1;i<nis;i++) { 7730 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7731 } 7732 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7733 ptr_idxs = recv_buffer_idxs_is; 7734 for (i=0;i<n_recvs;i++) { 7735 for (j=0;j<nis;j++) { 7736 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7737 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7738 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7739 ptr_idxs += plen+1; /* shift pointer to received data */ 7740 } 7741 } 7742 for (i=0;i<nis;i++) { 7743 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7744 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7745 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7746 } 7747 ierr = PetscFree(count_is);CHKERRQ(ierr); 7748 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7749 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7750 } 7751 /* free workspace */ 7752 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7753 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7754 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7755 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7756 if (isdense) { 7757 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7758 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7759 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7760 } else { 7761 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7762 } 7763 if (nis) { 7764 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7765 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7766 } 7767 7768 if (nvecs) { 7769 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7770 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7771 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7772 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7773 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7774 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7775 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7776 /* set values */ 7777 ptr_vals = recv_buffer_vecs; 7778 ptr_idxs = recv_buffer_idxs_local; 7779 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7780 for (i=0;i<n_recvs;i++) { 7781 PetscInt j; 7782 for (j=0;j<*(ptr_idxs+1);j++) { 7783 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7784 } 7785 ptr_idxs += olengths_idxs[i]; 7786 ptr_vals += olengths_idxs[i]-2; 7787 } 7788 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7789 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7790 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7791 } 7792 7793 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7794 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7795 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7796 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7797 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7798 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7799 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7800 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7801 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7802 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7803 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7804 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7805 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7806 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7807 ierr = PetscFree(onodes);CHKERRQ(ierr); 7808 if (nis) { 7809 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7810 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7811 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7812 } 7813 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7814 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7815 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7816 for (i=0;i<nis;i++) { 7817 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7818 } 7819 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7820 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7821 } 7822 *mat_n = NULL; 7823 } 7824 PetscFunctionReturn(0); 7825 } 7826 7827 /* temporary hack into ksp private data structure */ 7828 #include <petsc/private/kspimpl.h> 7829 7830 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7831 { 7832 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7833 PC_IS *pcis = (PC_IS*)pc->data; 7834 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7835 Mat coarsedivudotp = NULL; 7836 Mat coarseG,t_coarse_mat_is; 7837 MatNullSpace CoarseNullSpace = NULL; 7838 ISLocalToGlobalMapping coarse_islg; 7839 IS coarse_is,*isarray; 7840 PetscInt i,im_active=-1,active_procs=-1; 7841 PetscInt nis,nisdofs,nisneu,nisvert; 7842 PC pc_temp; 7843 PCType coarse_pc_type; 7844 KSPType coarse_ksp_type; 7845 PetscBool multilevel_requested,multilevel_allowed; 7846 PetscBool coarse_reuse; 7847 PetscInt ncoarse,nedcfield; 7848 PetscBool compute_vecs = PETSC_FALSE; 7849 PetscScalar *array; 7850 MatReuse coarse_mat_reuse; 7851 PetscBool restr, full_restr, have_void; 7852 PetscMPIInt commsize; 7853 PetscErrorCode ierr; 7854 7855 PetscFunctionBegin; 7856 /* Assign global numbering to coarse dofs */ 7857 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7858 PetscInt ocoarse_size; 7859 compute_vecs = PETSC_TRUE; 7860 7861 pcbddc->new_primal_space = PETSC_TRUE; 7862 ocoarse_size = pcbddc->coarse_size; 7863 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7864 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7865 /* see if we can avoid some work */ 7866 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7867 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7868 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7869 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7870 coarse_reuse = PETSC_FALSE; 7871 } else { /* we can safely reuse already computed coarse matrix */ 7872 coarse_reuse = PETSC_TRUE; 7873 } 7874 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7875 coarse_reuse = PETSC_FALSE; 7876 } 7877 /* reset any subassembling information */ 7878 if (!coarse_reuse || pcbddc->recompute_topography) { 7879 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7880 } 7881 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7882 coarse_reuse = PETSC_TRUE; 7883 } 7884 /* assemble coarse matrix */ 7885 if (coarse_reuse && pcbddc->coarse_ksp) { 7886 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7887 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7888 coarse_mat_reuse = MAT_REUSE_MATRIX; 7889 } else { 7890 coarse_mat = NULL; 7891 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7892 } 7893 7894 /* creates temporary l2gmap and IS for coarse indexes */ 7895 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7896 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7897 7898 /* creates temporary MATIS object for coarse matrix */ 7899 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7900 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7901 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7902 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7903 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7904 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7905 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7906 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7907 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7908 7909 /* count "active" (i.e. with positive local size) and "void" processes */ 7910 im_active = !!(pcis->n); 7911 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7912 7913 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7914 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7915 /* full_restr : just use the receivers from the subassembling pattern */ 7916 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7917 coarse_mat_is = NULL; 7918 multilevel_allowed = PETSC_FALSE; 7919 multilevel_requested = PETSC_FALSE; 7920 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7921 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7922 if (multilevel_requested) { 7923 ncoarse = active_procs/pcbddc->coarsening_ratio; 7924 restr = PETSC_FALSE; 7925 full_restr = PETSC_FALSE; 7926 } else { 7927 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7928 restr = PETSC_TRUE; 7929 full_restr = PETSC_TRUE; 7930 } 7931 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7932 ncoarse = PetscMax(1,ncoarse); 7933 if (!pcbddc->coarse_subassembling) { 7934 if (pcbddc->coarsening_ratio > 1) { 7935 if (multilevel_requested) { 7936 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7937 } else { 7938 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7939 } 7940 } else { 7941 PetscMPIInt rank; 7942 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7943 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7944 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7945 } 7946 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7947 PetscInt psum; 7948 if (pcbddc->coarse_ksp) psum = 1; 7949 else psum = 0; 7950 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7951 if (ncoarse < commsize) have_void = PETSC_TRUE; 7952 } 7953 /* determine if we can go multilevel */ 7954 if (multilevel_requested) { 7955 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7956 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7957 } 7958 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7959 7960 /* dump subassembling pattern */ 7961 if (pcbddc->dbg_flag && multilevel_allowed) { 7962 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7963 } 7964 7965 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7966 nedcfield = -1; 7967 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7968 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7969 const PetscInt *idxs; 7970 ISLocalToGlobalMapping tmap; 7971 7972 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7973 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7974 /* allocate space for temporary storage */ 7975 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7976 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7977 /* allocate for IS array */ 7978 nisdofs = pcbddc->n_ISForDofsLocal; 7979 if (pcbddc->nedclocal) { 7980 if (pcbddc->nedfield > -1) { 7981 nedcfield = pcbddc->nedfield; 7982 } else { 7983 nedcfield = 0; 7984 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7985 nisdofs = 1; 7986 } 7987 } 7988 nisneu = !!pcbddc->NeumannBoundariesLocal; 7989 nisvert = 0; /* nisvert is not used */ 7990 nis = nisdofs + nisneu + nisvert; 7991 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7992 /* dofs splitting */ 7993 for (i=0;i<nisdofs;i++) { 7994 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7995 if (nedcfield != i) { 7996 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7997 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7998 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7999 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8000 } else { 8001 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8002 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8003 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8004 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 8005 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8006 } 8007 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8008 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8009 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8010 } 8011 /* neumann boundaries */ 8012 if (pcbddc->NeumannBoundariesLocal) { 8013 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8014 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8015 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8016 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8017 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8018 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8019 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8020 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8021 } 8022 /* free memory */ 8023 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8024 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8025 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8026 } else { 8027 nis = 0; 8028 nisdofs = 0; 8029 nisneu = 0; 8030 nisvert = 0; 8031 isarray = NULL; 8032 } 8033 /* destroy no longer needed map */ 8034 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8035 8036 /* subassemble */ 8037 if (multilevel_allowed) { 8038 Vec vp[1]; 8039 PetscInt nvecs = 0; 8040 PetscBool reuse,reuser; 8041 8042 if (coarse_mat) reuse = PETSC_TRUE; 8043 else reuse = PETSC_FALSE; 8044 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8045 vp[0] = NULL; 8046 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8047 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8048 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8049 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8050 nvecs = 1; 8051 8052 if (pcbddc->divudotp) { 8053 Mat B,loc_divudotp; 8054 Vec v,p; 8055 IS dummy; 8056 PetscInt np; 8057 8058 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8059 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8060 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8061 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8062 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8063 ierr = VecSet(p,1.);CHKERRQ(ierr); 8064 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8065 ierr = VecDestroy(&p);CHKERRQ(ierr); 8066 ierr = MatDestroy(&B);CHKERRQ(ierr); 8067 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8068 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8069 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8070 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8071 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8072 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8073 ierr = VecDestroy(&v);CHKERRQ(ierr); 8074 } 8075 } 8076 if (reuser) { 8077 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8078 } else { 8079 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8080 } 8081 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8082 PetscScalar *arraym,*arrayv; 8083 PetscInt nl; 8084 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8085 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8086 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8087 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 8088 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 8089 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 8090 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8091 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8092 } else { 8093 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8094 } 8095 } else { 8096 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8097 } 8098 if (coarse_mat_is || coarse_mat) { 8099 PetscMPIInt size; 8100 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 8101 if (!multilevel_allowed) { 8102 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8103 } else { 8104 Mat A; 8105 8106 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8107 if (coarse_mat_is) { 8108 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8109 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8110 coarse_mat = coarse_mat_is; 8111 } 8112 /* be sure we don't have MatSeqDENSE as local mat */ 8113 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 8114 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 8115 } 8116 } 8117 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8118 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8119 8120 /* create local to global scatters for coarse problem */ 8121 if (compute_vecs) { 8122 PetscInt lrows; 8123 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8124 if (coarse_mat) { 8125 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8126 } else { 8127 lrows = 0; 8128 } 8129 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8130 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8131 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 8132 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8133 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8134 } 8135 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8136 8137 /* set defaults for coarse KSP and PC */ 8138 if (multilevel_allowed) { 8139 coarse_ksp_type = KSPRICHARDSON; 8140 coarse_pc_type = PCBDDC; 8141 } else { 8142 coarse_ksp_type = KSPPREONLY; 8143 coarse_pc_type = PCREDUNDANT; 8144 } 8145 8146 /* print some info if requested */ 8147 if (pcbddc->dbg_flag) { 8148 if (!multilevel_allowed) { 8149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8150 if (multilevel_requested) { 8151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8152 } else if (pcbddc->max_levels) { 8153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 8154 } 8155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8156 } 8157 } 8158 8159 /* communicate coarse discrete gradient */ 8160 coarseG = NULL; 8161 if (pcbddc->nedcG && multilevel_allowed) { 8162 MPI_Comm ccomm; 8163 if (coarse_mat) { 8164 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8165 } else { 8166 ccomm = MPI_COMM_NULL; 8167 } 8168 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8169 } 8170 8171 /* create the coarse KSP object only once with defaults */ 8172 if (coarse_mat) { 8173 PetscBool isredundant,isnn,isbddc; 8174 PetscViewer dbg_viewer = NULL; 8175 8176 if (pcbddc->dbg_flag) { 8177 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8178 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8179 } 8180 if (!pcbddc->coarse_ksp) { 8181 char prefix[256],str_level[16]; 8182 size_t len; 8183 8184 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8185 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8186 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8187 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8188 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8189 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8190 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8191 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8192 /* TODO is this logic correct? should check for coarse_mat type */ 8193 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8194 /* prefix */ 8195 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8196 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8197 if (!pcbddc->current_level) { 8198 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8199 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8200 } else { 8201 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8202 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8203 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8204 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8205 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8206 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8207 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8208 } 8209 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8210 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8211 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8212 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8213 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8214 /* allow user customization */ 8215 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8216 } 8217 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8218 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8219 if (nisdofs) { 8220 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8221 for (i=0;i<nisdofs;i++) { 8222 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8223 } 8224 } 8225 if (nisneu) { 8226 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8227 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8228 } 8229 if (nisvert) { 8230 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8231 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8232 } 8233 if (coarseG) { 8234 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8235 } 8236 8237 /* get some info after set from options */ 8238 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8239 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8240 if (isbddc && !multilevel_allowed) { 8241 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8242 isbddc = PETSC_FALSE; 8243 } 8244 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8245 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8246 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8247 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8248 isbddc = PETSC_TRUE; 8249 } 8250 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8251 if (isredundant) { 8252 KSP inner_ksp; 8253 PC inner_pc; 8254 8255 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8256 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8257 } 8258 8259 /* parameters which miss an API */ 8260 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8261 if (isbddc) { 8262 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8263 8264 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8265 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8266 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8267 if (pcbddc_coarse->benign_saddle_point) { 8268 Mat coarsedivudotp_is; 8269 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8270 IS row,col; 8271 const PetscInt *gidxs; 8272 PetscInt n,st,M,N; 8273 8274 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8275 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8276 st = st-n; 8277 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8278 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8279 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8280 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8281 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8282 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8283 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8284 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8285 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8286 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8287 ierr = ISDestroy(&row);CHKERRQ(ierr); 8288 ierr = ISDestroy(&col);CHKERRQ(ierr); 8289 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8290 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8291 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8292 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8293 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8294 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8295 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8296 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8297 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8298 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8299 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8300 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8301 } 8302 } 8303 8304 /* propagate symmetry info of coarse matrix */ 8305 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8306 if (pc->pmat->symmetric_set) { 8307 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8308 } 8309 if (pc->pmat->hermitian_set) { 8310 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8311 } 8312 if (pc->pmat->spd_set) { 8313 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8314 } 8315 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8316 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8317 } 8318 /* set operators */ 8319 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8320 if (pcbddc->dbg_flag) { 8321 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8322 } 8323 } 8324 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8325 ierr = PetscFree(isarray);CHKERRQ(ierr); 8326 #if 0 8327 { 8328 PetscViewer viewer; 8329 char filename[256]; 8330 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8331 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8332 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8333 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8334 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8335 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8336 } 8337 #endif 8338 8339 if (pcbddc->coarse_ksp) { 8340 Vec crhs,csol; 8341 8342 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8343 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8344 if (!csol) { 8345 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8346 } 8347 if (!crhs) { 8348 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8349 } 8350 } 8351 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8352 8353 /* compute null space for coarse solver if the benign trick has been requested */ 8354 if (pcbddc->benign_null) { 8355 8356 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8357 for (i=0;i<pcbddc->benign_n;i++) { 8358 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8359 } 8360 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8361 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8362 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8363 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8364 if (coarse_mat) { 8365 Vec nullv; 8366 PetscScalar *array,*array2; 8367 PetscInt nl; 8368 8369 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8370 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8371 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8372 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8373 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8374 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8375 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8376 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8377 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8378 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8379 } 8380 } 8381 8382 if (pcbddc->coarse_ksp) { 8383 PetscBool ispreonly; 8384 8385 if (CoarseNullSpace) { 8386 PetscBool isnull; 8387 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8388 if (isnull) { 8389 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8390 } 8391 /* TODO: add local nullspaces (if any) */ 8392 } 8393 /* setup coarse ksp */ 8394 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8395 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8396 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8397 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8398 KSP check_ksp; 8399 KSPType check_ksp_type; 8400 PC check_pc; 8401 Vec check_vec,coarse_vec; 8402 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8403 PetscInt its; 8404 PetscBool compute_eigs; 8405 PetscReal *eigs_r,*eigs_c; 8406 PetscInt neigs; 8407 const char *prefix; 8408 8409 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8410 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8411 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8412 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8413 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8414 /* prevent from setup unneeded object */ 8415 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8416 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8417 if (ispreonly) { 8418 check_ksp_type = KSPPREONLY; 8419 compute_eigs = PETSC_FALSE; 8420 } else { 8421 check_ksp_type = KSPGMRES; 8422 compute_eigs = PETSC_TRUE; 8423 } 8424 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8425 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8426 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8427 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8428 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8429 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8430 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8431 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8432 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8433 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8434 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8435 /* create random vec */ 8436 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8437 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8438 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8439 /* solve coarse problem */ 8440 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8441 /* set eigenvalue estimation if preonly has not been requested */ 8442 if (compute_eigs) { 8443 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8444 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8445 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8446 if (neigs) { 8447 lambda_max = eigs_r[neigs-1]; 8448 lambda_min = eigs_r[0]; 8449 if (pcbddc->use_coarse_estimates) { 8450 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8451 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8452 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8453 } 8454 } 8455 } 8456 } 8457 8458 /* check coarse problem residual error */ 8459 if (pcbddc->dbg_flag) { 8460 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8461 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8462 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8463 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8464 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8465 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8466 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8467 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8468 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8469 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8470 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8471 if (CoarseNullSpace) { 8472 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8473 } 8474 if (compute_eigs) { 8475 PetscReal lambda_max_s,lambda_min_s; 8476 KSPConvergedReason reason; 8477 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8478 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8479 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8480 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8481 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8482 for (i=0;i<neigs;i++) { 8483 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8484 } 8485 } 8486 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8487 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8488 } 8489 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8490 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8491 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8492 if (compute_eigs) { 8493 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8494 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8495 } 8496 } 8497 } 8498 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8499 /* print additional info */ 8500 if (pcbddc->dbg_flag) { 8501 /* waits until all processes reaches this point */ 8502 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8503 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8504 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8505 } 8506 8507 /* free memory */ 8508 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8509 PetscFunctionReturn(0); 8510 } 8511 8512 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8513 { 8514 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8515 PC_IS* pcis = (PC_IS*)pc->data; 8516 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8517 IS subset,subset_mult,subset_n; 8518 PetscInt local_size,coarse_size=0; 8519 PetscInt *local_primal_indices=NULL; 8520 const PetscInt *t_local_primal_indices; 8521 PetscErrorCode ierr; 8522 8523 PetscFunctionBegin; 8524 /* Compute global number of coarse dofs */ 8525 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8526 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8527 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8528 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8529 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8530 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8531 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8532 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8533 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8534 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8535 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8536 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8537 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8538 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8539 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8540 8541 /* check numbering */ 8542 if (pcbddc->dbg_flag) { 8543 PetscScalar coarsesum,*array,*array2; 8544 PetscInt i; 8545 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8546 8547 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8548 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8550 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8551 /* counter */ 8552 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8553 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8554 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8555 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8556 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8557 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8558 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8559 for (i=0;i<pcbddc->local_primal_size;i++) { 8560 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8561 } 8562 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8563 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8564 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8565 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8566 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8567 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8568 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8569 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8570 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8571 for (i=0;i<pcis->n;i++) { 8572 if (array[i] != 0.0 && array[i] != array2[i]) { 8573 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8574 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8575 set_error = PETSC_TRUE; 8576 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8578 } 8579 } 8580 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8581 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8583 for (i=0;i<pcis->n;i++) { 8584 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8585 } 8586 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8587 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8588 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8589 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8590 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8591 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8592 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8593 PetscInt *gidxs; 8594 8595 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8596 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8597 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8598 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8600 for (i=0;i<pcbddc->local_primal_size;i++) { 8601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8602 } 8603 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8604 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8605 } 8606 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8607 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8608 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8609 } 8610 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8611 /* get back data */ 8612 *coarse_size_n = coarse_size; 8613 *local_primal_indices_n = local_primal_indices; 8614 PetscFunctionReturn(0); 8615 } 8616 8617 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8618 { 8619 IS localis_t; 8620 PetscInt i,lsize,*idxs,n; 8621 PetscScalar *vals; 8622 PetscErrorCode ierr; 8623 8624 PetscFunctionBegin; 8625 /* get indices in local ordering exploiting local to global map */ 8626 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8627 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8628 for (i=0;i<lsize;i++) vals[i] = 1.0; 8629 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8630 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8631 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8632 if (idxs) { /* multilevel guard */ 8633 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8634 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8635 } 8636 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8637 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8638 ierr = PetscFree(vals);CHKERRQ(ierr); 8639 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8640 /* now compute set in local ordering */ 8641 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8642 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8643 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8644 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8645 for (i=0,lsize=0;i<n;i++) { 8646 if (PetscRealPart(vals[i]) > 0.5) { 8647 lsize++; 8648 } 8649 } 8650 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8651 for (i=0,lsize=0;i<n;i++) { 8652 if (PetscRealPart(vals[i]) > 0.5) { 8653 idxs[lsize++] = i; 8654 } 8655 } 8656 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8657 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8658 *localis = localis_t; 8659 PetscFunctionReturn(0); 8660 } 8661 8662 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8663 { 8664 PC_IS *pcis=(PC_IS*)pc->data; 8665 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8666 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8667 Mat S_j; 8668 PetscInt *used_xadj,*used_adjncy; 8669 PetscBool free_used_adj; 8670 PetscErrorCode ierr; 8671 8672 PetscFunctionBegin; 8673 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8674 free_used_adj = PETSC_FALSE; 8675 if (pcbddc->sub_schurs_layers == -1) { 8676 used_xadj = NULL; 8677 used_adjncy = NULL; 8678 } else { 8679 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8680 used_xadj = pcbddc->mat_graph->xadj; 8681 used_adjncy = pcbddc->mat_graph->adjncy; 8682 } else if (pcbddc->computed_rowadj) { 8683 used_xadj = pcbddc->mat_graph->xadj; 8684 used_adjncy = pcbddc->mat_graph->adjncy; 8685 } else { 8686 PetscBool flg_row=PETSC_FALSE; 8687 const PetscInt *xadj,*adjncy; 8688 PetscInt nvtxs; 8689 8690 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8691 if (flg_row) { 8692 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8693 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8694 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8695 free_used_adj = PETSC_TRUE; 8696 } else { 8697 pcbddc->sub_schurs_layers = -1; 8698 used_xadj = NULL; 8699 used_adjncy = NULL; 8700 } 8701 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8702 } 8703 } 8704 8705 /* setup sub_schurs data */ 8706 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8707 if (!sub_schurs->schur_explicit) { 8708 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8709 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8710 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8711 } else { 8712 Mat change = NULL; 8713 Vec scaling = NULL; 8714 IS change_primal = NULL, iP; 8715 PetscInt benign_n; 8716 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8717 PetscBool isseqaij,need_change = PETSC_FALSE; 8718 PetscBool discrete_harmonic = PETSC_FALSE; 8719 8720 if (!pcbddc->use_vertices && reuse_solvers) { 8721 PetscInt n_vertices; 8722 8723 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8724 reuse_solvers = (PetscBool)!n_vertices; 8725 } 8726 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8727 if (!isseqaij) { 8728 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8729 if (matis->A == pcbddc->local_mat) { 8730 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8731 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8732 } else { 8733 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8734 } 8735 } 8736 if (!pcbddc->benign_change_explicit) { 8737 benign_n = pcbddc->benign_n; 8738 } else { 8739 benign_n = 0; 8740 } 8741 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8742 We need a global reduction to avoid possible deadlocks. 8743 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8744 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8745 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8746 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8747 need_change = (PetscBool)(!need_change); 8748 } 8749 /* If the user defines additional constraints, we import them here. 8750 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8751 if (need_change) { 8752 PC_IS *pcisf; 8753 PC_BDDC *pcbddcf; 8754 PC pcf; 8755 8756 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8757 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8758 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8759 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8760 8761 /* hacks */ 8762 pcisf = (PC_IS*)pcf->data; 8763 pcisf->is_B_local = pcis->is_B_local; 8764 pcisf->vec1_N = pcis->vec1_N; 8765 pcisf->BtoNmap = pcis->BtoNmap; 8766 pcisf->n = pcis->n; 8767 pcisf->n_B = pcis->n_B; 8768 pcbddcf = (PC_BDDC*)pcf->data; 8769 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8770 pcbddcf->mat_graph = pcbddc->mat_graph; 8771 pcbddcf->use_faces = PETSC_TRUE; 8772 pcbddcf->use_change_of_basis = PETSC_TRUE; 8773 pcbddcf->use_change_on_faces = PETSC_TRUE; 8774 pcbddcf->use_qr_single = PETSC_TRUE; 8775 pcbddcf->fake_change = PETSC_TRUE; 8776 8777 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8778 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8779 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8780 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8781 change = pcbddcf->ConstraintMatrix; 8782 pcbddcf->ConstraintMatrix = NULL; 8783 8784 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8785 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8786 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8787 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8788 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8789 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8790 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8791 pcf->ops->destroy = NULL; 8792 pcf->ops->reset = NULL; 8793 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8794 } 8795 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8796 8797 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8798 if (iP) { 8799 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8800 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8801 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8802 } 8803 if (discrete_harmonic) { 8804 Mat A; 8805 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8806 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8807 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8808 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8809 ierr = MatDestroy(&A);CHKERRQ(ierr); 8810 } else { 8811 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8812 } 8813 ierr = MatDestroy(&change);CHKERRQ(ierr); 8814 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8815 } 8816 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8817 8818 /* free adjacency */ 8819 if (free_used_adj) { 8820 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8821 } 8822 PetscFunctionReturn(0); 8823 } 8824 8825 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8826 { 8827 PC_IS *pcis=(PC_IS*)pc->data; 8828 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8829 PCBDDCGraph graph; 8830 PetscErrorCode ierr; 8831 8832 PetscFunctionBegin; 8833 /* attach interface graph for determining subsets */ 8834 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8835 IS verticesIS,verticescomm; 8836 PetscInt vsize,*idxs; 8837 8838 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8839 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8840 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8841 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8842 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8843 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8844 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8845 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8846 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8847 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8848 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8849 } else { 8850 graph = pcbddc->mat_graph; 8851 } 8852 /* print some info */ 8853 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8854 IS vertices; 8855 PetscInt nv,nedges,nfaces; 8856 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8857 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8858 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8859 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8860 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8861 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8862 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8863 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8864 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8865 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8866 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8867 } 8868 8869 /* sub_schurs init */ 8870 if (!pcbddc->sub_schurs) { 8871 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8872 } 8873 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8874 8875 /* free graph struct */ 8876 if (pcbddc->sub_schurs_rebuild) { 8877 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8878 } 8879 PetscFunctionReturn(0); 8880 } 8881 8882 PetscErrorCode PCBDDCCheckOperator(PC pc) 8883 { 8884 PC_IS *pcis=(PC_IS*)pc->data; 8885 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8886 PetscErrorCode ierr; 8887 8888 PetscFunctionBegin; 8889 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8890 IS zerodiag = NULL; 8891 Mat S_j,B0_B=NULL; 8892 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8893 PetscScalar *p0_check,*array,*array2; 8894 PetscReal norm; 8895 PetscInt i; 8896 8897 /* B0 and B0_B */ 8898 if (zerodiag) { 8899 IS dummy; 8900 8901 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8902 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8903 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8904 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8905 } 8906 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8907 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8908 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8909 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8910 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8911 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8912 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8913 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8914 /* S_j */ 8915 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8916 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8917 8918 /* mimic vector in \widetilde{W}_\Gamma */ 8919 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8920 /* continuous in primal space */ 8921 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8922 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8923 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8924 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8925 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8926 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8927 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8928 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8929 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8930 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8931 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8932 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8933 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8934 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8935 8936 /* assemble rhs for coarse problem */ 8937 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8938 /* local with Schur */ 8939 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8940 if (zerodiag) { 8941 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8942 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8943 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8944 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8945 } 8946 /* sum on primal nodes the local contributions */ 8947 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8948 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8949 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8950 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8951 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8952 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8953 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8954 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8955 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8956 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8957 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8958 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8959 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8960 /* scale primal nodes (BDDC sums contibutions) */ 8961 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8962 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8963 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8964 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8965 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8966 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8967 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8968 /* global: \widetilde{B0}_B w_\Gamma */ 8969 if (zerodiag) { 8970 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8971 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8972 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8973 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8974 } 8975 /* BDDC */ 8976 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8977 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8978 8979 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8980 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8981 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8982 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8983 for (i=0;i<pcbddc->benign_n;i++) { 8984 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8985 } 8986 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8987 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8988 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8989 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8990 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8991 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8992 } 8993 PetscFunctionReturn(0); 8994 } 8995 8996 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8997 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8998 { 8999 Mat At; 9000 IS rows; 9001 PetscInt rst,ren; 9002 PetscErrorCode ierr; 9003 PetscLayout rmap; 9004 9005 PetscFunctionBegin; 9006 rst = ren = 0; 9007 if (ccomm != MPI_COMM_NULL) { 9008 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9009 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9010 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9011 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9012 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9013 } 9014 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9015 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9016 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9017 9018 if (ccomm != MPI_COMM_NULL) { 9019 Mat_MPIAIJ *a,*b; 9020 IS from,to; 9021 Vec gvec; 9022 PetscInt lsize; 9023 9024 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9025 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9026 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9027 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9028 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9029 a = (Mat_MPIAIJ*)At->data; 9030 b = (Mat_MPIAIJ*)(*B)->data; 9031 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9032 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9033 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9034 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9035 b->A = a->A; 9036 b->B = a->B; 9037 9038 b->donotstash = a->donotstash; 9039 b->roworiented = a->roworiented; 9040 b->rowindices = 0; 9041 b->rowvalues = 0; 9042 b->getrowactive = PETSC_FALSE; 9043 9044 (*B)->rmap = rmap; 9045 (*B)->factortype = A->factortype; 9046 (*B)->assembled = PETSC_TRUE; 9047 (*B)->insertmode = NOT_SET_VALUES; 9048 (*B)->preallocated = PETSC_TRUE; 9049 9050 if (a->colmap) { 9051 #if defined(PETSC_USE_CTABLE) 9052 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9053 #else 9054 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9055 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9056 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9057 #endif 9058 } else b->colmap = 0; 9059 if (a->garray) { 9060 PetscInt len; 9061 len = a->B->cmap->n; 9062 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9063 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9064 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 9065 } else b->garray = 0; 9066 9067 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9068 b->lvec = a->lvec; 9069 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9070 9071 /* cannot use VecScatterCopy */ 9072 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9073 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9074 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9075 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9076 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9077 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9078 ierr = ISDestroy(&from);CHKERRQ(ierr); 9079 ierr = ISDestroy(&to);CHKERRQ(ierr); 9080 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9081 } 9082 ierr = MatDestroy(&At);CHKERRQ(ierr); 9083 PetscFunctionReturn(0); 9084 } 9085