1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 26 if (!nr || !nc) PetscFunctionReturn(0); 27 28 /* workspace */ 29 if (!work) { 30 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 31 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 32 } else { 33 ulw = lw; 34 uwork = work; 35 } 36 n = PetscMin(nr,nc); 37 if (!rwork) { 38 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 39 } else { 40 sing = rwork; 41 } 42 43 /* SVD */ 44 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 48 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 49 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 50 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 51 ierr = PetscFPTrapPop();CHKERRQ(ierr); 52 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 53 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 54 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 55 if (!rwork) { 56 ierr = PetscFree(sing);CHKERRQ(ierr); 57 } 58 if (!work) { 59 ierr = PetscFree(uwork);CHKERRQ(ierr); 60 } 61 /* create B */ 62 if (!range) { 63 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 64 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 65 ierr = PetscArraycpy(data,U+nr*i,(nr-i)*nr);CHKERRQ(ierr); 66 } else { 67 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 68 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 69 ierr = PetscArraycpy(data,U,i*nr);CHKERRQ(ierr); 70 } 71 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscFree(U);CHKERRQ(ierr); 73 #else /* PETSC_USE_COMPLEX */ 74 PetscFunctionBegin; 75 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 76 #endif 77 PetscFunctionReturn(0); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 PetscErrorCode ierr; 89 Mat GE,GEd; 90 PetscInt rsize,csize,esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 95 if (!esize) PetscFunctionReturn(0); 96 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 97 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 98 99 /* gradients */ 100 ptr = work + 5*esize; 101 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 103 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 104 ierr = MatDestroy(&GE);CHKERRQ(ierr); 105 106 /* constants */ 107 ptr += rsize*csize; 108 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 109 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 110 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 111 ierr = MatDestroy(&GE);CHKERRQ(ierr); 112 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 113 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 121 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 122 ierr = MatDenseGetArrayRead(GEd,&vals);CHKERRQ(ierr); 123 /* v = PetscAbsScalar(vals[0]) */; 124 v = 1.; 125 cvals[0] = vals[0]/v; 126 cvals[1] = vals[1]/v; 127 ierr = MatDenseRestoreArrayRead(GEd,&vals);CHKERRQ(ierr); 128 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 134 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 135 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 136 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 137 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 138 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 139 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 141 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 142 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 143 } 144 #endif 145 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 146 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 147 } 148 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode PCBDDCNedelecSupport(PC pc) 153 { 154 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 155 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 156 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 157 Vec tvec; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 160 MPI_Comm comm; 161 IS lned,primals,allprimals,nedfieldlocal; 162 IS *eedges,*extrows,*extcols,*alleedges; 163 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 164 PetscScalar *vals,*work; 165 PetscReal *rwork; 166 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 167 PetscInt ne,nv,Lv,order,n,field; 168 PetscInt n_neigh,*neigh,*n_shared,**shared; 169 PetscInt i,j,extmem,cum,maxsize,nee; 170 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 171 PetscInt *sfvleaves,*sfvroots; 172 PetscInt *corners,*cedges; 173 PetscInt *ecount,**eneighs,*vcount,**vneighs; 174 #if defined(PETSC_USE_DEBUG) 175 PetscInt *emarks; 176 #endif 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 /* If the discrete gradient is defined for a subset of dofs and global is true, 182 it assumes G is given in global ordering for all the dofs. 183 Otherwise, the ordering is global for the Nedelec field */ 184 order = pcbddc->nedorder; 185 conforming = pcbddc->conforming; 186 field = pcbddc->nedfield; 187 global = pcbddc->nedglobal; 188 setprimal = PETSC_FALSE; 189 print = PETSC_FALSE; 190 singular = PETSC_FALSE; 191 192 /* Command line customization */ 193 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 194 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 195 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 196 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 197 /* print debug info TODO: to be removed */ 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsEnd();CHKERRQ(ierr); 200 201 /* Return if there are no edges in the decomposition and the problem is not singular */ 202 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 203 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 204 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 205 if (!singular) { 206 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 207 lrc[0] = PETSC_FALSE; 208 for (i=0;i<n;i++) { 209 if (PetscRealPart(vals[i]) > 2.) { 210 lrc[0] = PETSC_TRUE; 211 break; 212 } 213 } 214 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 215 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 216 if (!lrc[1]) PetscFunctionReturn(0); 217 } 218 219 /* Get Nedelec field */ 220 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 221 if (pcbddc->n_ISForDofsLocal && field >= 0) { 222 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 223 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 224 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 225 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 226 ne = n; 227 nedfieldlocal = NULL; 228 global = PETSC_TRUE; 229 } else if (field == PETSC_DECIDE) { 230 PetscInt rst,ren,*idx; 231 232 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 233 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 234 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 235 for (i=rst;i<ren;i++) { 236 PetscInt nc; 237 238 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 239 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 240 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 } 242 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 243 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 244 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 245 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 246 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 247 } else { 248 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 249 } 250 251 /* Sanity checks */ 252 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 253 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 254 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 255 256 /* Just set primal dofs and return */ 257 if (setprimal) { 258 IS enedfieldlocal; 259 PetscInt *eidxs; 260 261 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 262 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 263 if (nedfieldlocal) { 264 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 265 for (i=0,cum=0;i<ne;i++) { 266 if (PetscRealPart(vals[idxs[i]]) > 2.) { 267 eidxs[cum++] = idxs[i]; 268 } 269 } 270 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 271 } else { 272 for (i=0,cum=0;i<ne;i++) { 273 if (PetscRealPart(vals[i]) > 2.) { 274 eidxs[cum++] = i; 275 } 276 } 277 } 278 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 279 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 280 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 281 ierr = PetscFree(eidxs);CHKERRQ(ierr); 282 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 283 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 /* Compute some l2g maps */ 288 if (nedfieldlocal) { 289 IS is; 290 291 /* need to map from the local Nedelec field to local numbering */ 292 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 293 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 294 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 295 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 297 if (global) { 298 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 299 el2g = al2g; 300 } else { 301 IS gis; 302 303 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 304 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 305 ierr = ISDestroy(&gis);CHKERRQ(ierr); 306 } 307 ierr = ISDestroy(&is);CHKERRQ(ierr); 308 } else { 309 /* restore default */ 310 pcbddc->nedfield = -1; 311 /* one ref for the destruction of al2g, one for el2g */ 312 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 313 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 314 el2g = al2g; 315 fl2g = NULL; 316 } 317 318 /* Start communication to drop connections for interior edges (for cc analysis only) */ 319 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 320 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 321 if (nedfieldlocal) { 322 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 324 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 } else { 326 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 327 } 328 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 329 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 330 331 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 332 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 333 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 334 if (global) { 335 PetscInt rst; 336 337 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 338 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 339 if (matis->sf_rootdata[i] < 2) { 340 matis->sf_rootdata[cum++] = i + rst; 341 } 342 } 343 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 344 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 345 } else { 346 PetscInt *tbz; 347 348 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 349 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 350 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 351 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 352 for (i=0,cum=0;i<ne;i++) 353 if (matis->sf_leafdata[idxs[i]] == 1) 354 tbz[cum++] = i; 355 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 357 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 358 ierr = PetscFree(tbz);CHKERRQ(ierr); 359 } 360 } else { /* we need the entire G to infer the nullspace */ 361 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 362 G = pcbddc->discretegradient; 363 } 364 365 /* Extract subdomain relevant rows of G */ 366 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 367 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 368 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 369 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISDestroy(&lned);CHKERRQ(ierr); 371 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 372 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 373 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 374 375 /* SF for nodal dofs communications */ 376 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 377 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 378 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 379 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 380 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 382 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 384 i = singular ? 2 : 1; 385 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 386 387 /* Destroy temporary G created in MATIS format and modified G */ 388 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 389 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 390 ierr = MatDestroy(&G);CHKERRQ(ierr); 391 392 if (print) { 393 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 394 ierr = MatView(lG,NULL);CHKERRQ(ierr); 395 } 396 397 /* Save lG for values insertion in change of basis */ 398 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 399 400 /* Analyze the edge-nodes connections (duplicate lG) */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 402 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 403 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 404 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 405 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 408 /* need to import the boundary specification to ensure the 409 proper detection of coarse edges' endpoints */ 410 if (pcbddc->DirichletBoundariesLocal) { 411 IS is; 412 413 if (fl2g) { 414 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 415 } else { 416 is = pcbddc->DirichletBoundariesLocal; 417 } 418 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 419 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 420 for (i=0;i<cum;i++) { 421 if (idxs[i] >= 0) { 422 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 423 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 424 } 425 } 426 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 427 if (fl2g) { 428 ierr = ISDestroy(&is);CHKERRQ(ierr); 429 } 430 } 431 if (pcbddc->NeumannBoundariesLocal) { 432 IS is; 433 434 if (fl2g) { 435 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 436 } else { 437 is = pcbddc->NeumannBoundariesLocal; 438 } 439 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 441 for (i=0;i<cum;i++) { 442 if (idxs[i] >= 0) { 443 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 444 } 445 } 446 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 447 if (fl2g) { 448 ierr = ISDestroy(&is);CHKERRQ(ierr); 449 } 450 } 451 452 /* Count neighs per dof */ 453 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 454 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 455 456 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 457 for proper detection of coarse edges' endpoints */ 458 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 459 for (i=0;i<ne;i++) { 460 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 461 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 462 } 463 } 464 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 465 if (!conforming) { 466 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 467 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 468 } 469 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 470 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 471 cum = 0; 472 for (i=0;i<ne;i++) { 473 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 474 if (!PetscBTLookup(btee,i)) { 475 marks[cum++] = i; 476 continue; 477 } 478 /* set badly connected edge dofs as primal */ 479 if (!conforming) { 480 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 481 marks[cum++] = i; 482 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 483 for (j=ii[i];j<ii[i+1];j++) { 484 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 485 } 486 } else { 487 /* every edge dofs should be connected trough a certain number of nodal dofs 488 to other edge dofs belonging to coarse edges 489 - at most 2 endpoints 490 - order-1 interior nodal dofs 491 - no undefined nodal dofs (nconn < order) 492 */ 493 PetscInt ends = 0,ints = 0, undef = 0; 494 for (j=ii[i];j<ii[i+1];j++) { 495 PetscInt v = jj[j],k; 496 PetscInt nconn = iit[v+1]-iit[v]; 497 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 498 if (nconn > order) ends++; 499 else if (nconn == order) ints++; 500 else undef++; 501 } 502 if (undef || ends > 2 || ints != order -1) { 503 marks[cum++] = i; 504 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 505 for (j=ii[i];j<ii[i+1];j++) { 506 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 507 } 508 } 509 } 510 } 511 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 512 if (!order && ii[i+1] != ii[i]) { 513 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 514 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 515 } 516 } 517 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 518 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 519 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 520 if (!conforming) { 521 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 522 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 523 } 524 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 525 526 /* identify splitpoints and corner candidates */ 527 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 528 if (print) { 529 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 530 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 531 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 532 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 533 } 534 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 535 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 536 for (i=0;i<nv;i++) { 537 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 538 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 539 if (!order) { /* variable order */ 540 PetscReal vorder = 0.; 541 542 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 543 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 544 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 545 ord = 1; 546 } 547 #if defined(PETSC_USE_DEBUG) 548 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 549 #endif 550 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 551 if (PetscBTLookup(btbd,jj[j])) { 552 bdir = PETSC_TRUE; 553 break; 554 } 555 if (vc != ecount[jj[j]]) { 556 sneighs = PETSC_FALSE; 557 } else { 558 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 559 for (k=0;k<vc;k++) { 560 if (vn[k] != en[k]) { 561 sneighs = PETSC_FALSE; 562 break; 563 } 564 } 565 } 566 } 567 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 568 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 569 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 570 } else if (test == ord) { 571 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 573 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 574 } else { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 576 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 577 } 578 } 579 } 580 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 581 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 582 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 583 584 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 585 if (order != 1) { 586 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 587 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 if (PetscBTLookup(btvcand,i)) { 590 PetscBool found = PETSC_FALSE; 591 for (j=ii[i];j<ii[i+1] && !found;j++) { 592 PetscInt k,e = jj[j]; 593 if (PetscBTLookup(bte,e)) continue; 594 for (k=iit[e];k<iit[e+1];k++) { 595 PetscInt v = jjt[k]; 596 if (v != i && PetscBTLookup(btvcand,v)) { 597 found = PETSC_TRUE; 598 break; 599 } 600 } 601 } 602 if (!found) { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 604 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 605 } else { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 607 } 608 } 609 } 610 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 611 } 612 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 613 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 614 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 615 616 /* Get the local G^T explicitly */ 617 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 618 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 619 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 620 621 /* Mark interior nodal dofs */ 622 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 623 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 624 for (i=1;i<n_neigh;i++) { 625 for (j=0;j<n_shared[i];j++) { 626 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 627 } 628 } 629 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 630 631 /* communicate corners and splitpoints */ 632 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 633 ierr = PetscArrayzero(sfvleaves,nv);CHKERRQ(ierr); 634 ierr = PetscArrayzero(sfvroots,Lv);CHKERRQ(ierr); 635 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 636 637 if (print) { 638 IS tbz; 639 640 cum = 0; 641 for (i=0;i<nv;i++) 642 if (sfvleaves[i]) 643 vmarks[cum++] = i; 644 645 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 646 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 647 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 648 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 649 } 650 651 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 652 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 653 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 654 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 655 656 /* Zero rows of lGt corresponding to identified corners 657 and interior nodal dofs */ 658 cum = 0; 659 for (i=0;i<nv;i++) { 660 if (sfvleaves[i]) { 661 vmarks[cum++] = i; 662 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 663 } 664 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 665 } 666 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 667 if (print) { 668 IS tbz; 669 670 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 671 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 672 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 673 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 674 } 675 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 676 ierr = PetscFree(vmarks);CHKERRQ(ierr); 677 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 678 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 679 680 /* Recompute G */ 681 ierr = MatDestroy(&lG);CHKERRQ(ierr); 682 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 683 if (print) { 684 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 685 ierr = MatView(lG,NULL);CHKERRQ(ierr); 686 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 687 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 688 } 689 690 /* Get primal dofs (if any) */ 691 cum = 0; 692 for (i=0;i<ne;i++) { 693 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 694 } 695 if (fl2g) { 696 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 697 } 698 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 699 if (print) { 700 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 701 ierr = ISView(primals,NULL);CHKERRQ(ierr); 702 } 703 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 704 /* TODO: what if the user passed in some of them ? */ 705 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 706 ierr = ISDestroy(&primals);CHKERRQ(ierr); 707 708 /* Compute edge connectivity */ 709 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 710 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 711 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 712 if (fl2g) { 713 PetscBT btf; 714 PetscInt *iia,*jja,*iiu,*jju; 715 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 716 717 /* create CSR for all local dofs */ 718 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 719 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 720 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 721 iiu = pcbddc->mat_graph->xadj; 722 jju = pcbddc->mat_graph->adjncy; 723 } else if (pcbddc->use_local_adj) { 724 rest = PETSC_TRUE; 725 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 726 } else { 727 free = PETSC_TRUE; 728 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 729 iiu[0] = 0; 730 for (i=0;i<n;i++) { 731 iiu[i+1] = i+1; 732 jju[i] = -1; 733 } 734 } 735 736 /* import sizes of CSR */ 737 iia[0] = 0; 738 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 739 740 /* overwrite entries corresponding to the Nedelec field */ 741 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 742 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 743 for (i=0;i<ne;i++) { 744 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 745 iia[idxs[i]+1] = ii[i+1]-ii[i]; 746 } 747 748 /* iia in CSR */ 749 for (i=0;i<n;i++) iia[i+1] += iia[i]; 750 751 /* jja in CSR */ 752 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 753 for (i=0;i<n;i++) 754 if (!PetscBTLookup(btf,i)) 755 for (j=0;j<iiu[i+1]-iiu[i];j++) 756 jja[iia[i]+j] = jju[iiu[i]+j]; 757 758 /* map edge dofs connectivity */ 759 if (jj) { 760 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 761 for (i=0;i<ne;i++) { 762 PetscInt e = idxs[i]; 763 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 764 } 765 } 766 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 767 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 768 if (rest) { 769 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 770 } 771 if (free) { 772 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 773 } 774 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 775 } else { 776 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 777 } 778 779 /* Analyze interface for edge dofs */ 780 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 781 pcbddc->mat_graph->twodim = PETSC_FALSE; 782 783 /* Get coarse edges in the edge space */ 784 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 785 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 786 787 if (fl2g) { 788 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 789 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 790 for (i=0;i<nee;i++) { 791 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 792 } 793 } else { 794 eedges = alleedges; 795 primals = allprimals; 796 } 797 798 /* Mark fine edge dofs with their coarse edge id */ 799 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 800 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 801 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 802 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 803 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 804 if (print) { 805 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 806 ierr = ISView(primals,NULL);CHKERRQ(ierr); 807 } 808 809 maxsize = 0; 810 for (i=0;i<nee;i++) { 811 PetscInt size,mark = i+1; 812 813 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 814 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 815 for (j=0;j<size;j++) marks[idxs[j]] = mark; 816 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 817 maxsize = PetscMax(maxsize,size); 818 } 819 820 /* Find coarse edge endpoints */ 821 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 822 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 823 for (i=0;i<nee;i++) { 824 PetscInt mark = i+1,size; 825 826 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 827 if (!size && nedfieldlocal) continue; 828 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 829 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 830 if (print) { 831 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 832 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 833 } 834 for (j=0;j<size;j++) { 835 PetscInt k, ee = idxs[j]; 836 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 837 for (k=ii[ee];k<ii[ee+1];k++) { 838 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 839 if (PetscBTLookup(btv,jj[k])) { 840 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 841 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 842 PetscInt k2; 843 PetscBool corner = PETSC_FALSE; 844 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 845 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 846 /* it's a corner if either is connected with an edge dof belonging to a different cc or 847 if the edge dof lie on the natural part of the boundary */ 848 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 849 corner = PETSC_TRUE; 850 break; 851 } 852 } 853 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 854 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 855 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 856 } else { 857 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 858 } 859 } 860 } 861 } 862 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 863 } 864 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 865 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 866 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 867 868 /* Reset marked primal dofs */ 869 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 870 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 871 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 872 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 873 874 /* Now use the initial lG */ 875 ierr = MatDestroy(&lG);CHKERRQ(ierr); 876 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 877 lG = lGinit; 878 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 879 880 /* Compute extended cols indices */ 881 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 882 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 883 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 884 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 885 i *= maxsize; 886 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 887 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 888 eerr = PETSC_FALSE; 889 for (i=0;i<nee;i++) { 890 PetscInt size,found = 0; 891 892 cum = 0; 893 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 894 if (!size && nedfieldlocal) continue; 895 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 896 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 897 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 898 for (j=0;j<size;j++) { 899 PetscInt k,ee = idxs[j]; 900 for (k=ii[ee];k<ii[ee+1];k++) { 901 PetscInt vv = jj[k]; 902 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 903 else if (!PetscBTLookupSet(btvc,vv)) found++; 904 } 905 } 906 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 907 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 908 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 909 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 910 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 911 /* it may happen that endpoints are not defined at this point 912 if it is the case, mark this edge for a second pass */ 913 if (cum != size -1 || found != 2) { 914 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 915 if (print) { 916 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 917 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 918 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 919 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 920 } 921 eerr = PETSC_TRUE; 922 } 923 } 924 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 925 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 926 if (done) { 927 PetscInt *newprimals; 928 929 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 930 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 931 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 932 ierr = PetscArraycpy(newprimals,idxs,cum);CHKERRQ(ierr); 933 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 934 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 935 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 936 for (i=0;i<nee;i++) { 937 PetscBool has_candidates = PETSC_FALSE; 938 if (PetscBTLookup(bter,i)) { 939 PetscInt size,mark = i+1; 940 941 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 942 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 943 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 944 for (j=0;j<size;j++) { 945 PetscInt k,ee = idxs[j]; 946 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 947 for (k=ii[ee];k<ii[ee+1];k++) { 948 /* set all candidates located on the edge as corners */ 949 if (PetscBTLookup(btvcand,jj[k])) { 950 PetscInt k2,vv = jj[k]; 951 has_candidates = PETSC_TRUE; 952 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 953 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 954 /* set all edge dofs connected to candidate as primals */ 955 for (k2=iit[vv];k2<iit[vv+1];k2++) { 956 if (marks[jjt[k2]] == mark) { 957 PetscInt k3,ee2 = jjt[k2]; 958 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 959 newprimals[cum++] = ee2; 960 /* finally set the new corners */ 961 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 962 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 963 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 964 } 965 } 966 } 967 } else { 968 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 969 } 970 } 971 } 972 if (!has_candidates) { /* circular edge */ 973 PetscInt k, ee = idxs[0],*tmarks; 974 975 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 976 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 977 for (k=ii[ee];k<ii[ee+1];k++) { 978 PetscInt k2; 979 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 980 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 981 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 982 } 983 for (j=0;j<size;j++) { 984 if (tmarks[idxs[j]] > 1) { 985 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 986 newprimals[cum++] = idxs[j]; 987 } 988 } 989 ierr = PetscFree(tmarks);CHKERRQ(ierr); 990 } 991 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 992 } 993 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 994 } 995 ierr = PetscFree(extcols);CHKERRQ(ierr); 996 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 997 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 998 if (fl2g) { 999 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1000 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1001 for (i=0;i<nee;i++) { 1002 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1003 } 1004 ierr = PetscFree(eedges);CHKERRQ(ierr); 1005 } 1006 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1007 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1008 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1009 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1010 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1011 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1012 pcbddc->mat_graph->twodim = PETSC_FALSE; 1013 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1014 if (fl2g) { 1015 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1016 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1017 for (i=0;i<nee;i++) { 1018 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1019 } 1020 } else { 1021 eedges = alleedges; 1022 primals = allprimals; 1023 } 1024 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1025 1026 /* Mark again */ 1027 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 1028 for (i=0;i<nee;i++) { 1029 PetscInt size,mark = i+1; 1030 1031 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1032 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1033 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1034 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1035 } 1036 if (print) { 1037 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1038 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1039 } 1040 1041 /* Recompute extended cols */ 1042 eerr = PETSC_FALSE; 1043 for (i=0;i<nee;i++) { 1044 PetscInt size; 1045 1046 cum = 0; 1047 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1048 if (!size && nedfieldlocal) continue; 1049 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1050 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1051 for (j=0;j<size;j++) { 1052 PetscInt k,ee = idxs[j]; 1053 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1054 } 1055 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1056 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1057 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1058 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1059 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1060 if (cum != size -1) { 1061 if (print) { 1062 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1063 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1064 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1065 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1066 } 1067 eerr = PETSC_TRUE; 1068 } 1069 } 1070 } 1071 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1072 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1073 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1074 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1075 /* an error should not occur at this point */ 1076 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1077 1078 /* Check the number of endpoints */ 1079 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1080 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1081 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1082 for (i=0;i<nee;i++) { 1083 PetscInt size, found = 0, gc[2]; 1084 1085 /* init with defaults */ 1086 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1087 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1088 if (!size && nedfieldlocal) continue; 1089 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1090 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1091 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1092 for (j=0;j<size;j++) { 1093 PetscInt k,ee = idxs[j]; 1094 for (k=ii[ee];k<ii[ee+1];k++) { 1095 PetscInt vv = jj[k]; 1096 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1097 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1098 corners[i*2+found++] = vv; 1099 } 1100 } 1101 } 1102 if (found != 2) { 1103 PetscInt e; 1104 if (fl2g) { 1105 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1106 } else { 1107 e = idxs[0]; 1108 } 1109 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1110 } 1111 1112 /* get primal dof index on this coarse edge */ 1113 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1114 if (gc[0] > gc[1]) { 1115 PetscInt swap = corners[2*i]; 1116 corners[2*i] = corners[2*i+1]; 1117 corners[2*i+1] = swap; 1118 } 1119 cedges[i] = idxs[size-1]; 1120 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1121 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1122 } 1123 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1124 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1125 1126 #if defined(PETSC_USE_DEBUG) 1127 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1128 not interfere with neighbouring coarse edges */ 1129 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1130 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1131 for (i=0;i<nv;i++) { 1132 PetscInt emax = 0,eemax = 0; 1133 1134 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1135 ierr = PetscArrayzero(emarks,nee+1);CHKERRQ(ierr); 1136 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1137 for (j=1;j<nee+1;j++) { 1138 if (emax < emarks[j]) { 1139 emax = emarks[j]; 1140 eemax = j; 1141 } 1142 } 1143 /* not relevant for edges */ 1144 if (!eemax) continue; 1145 1146 for (j=ii[i];j<ii[i+1];j++) { 1147 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1148 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1149 } 1150 } 1151 } 1152 ierr = PetscFree(emarks);CHKERRQ(ierr); 1153 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1154 #endif 1155 1156 /* Compute extended rows indices for edge blocks of the change of basis */ 1157 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1158 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1159 extmem *= maxsize; 1160 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1161 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1162 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1163 for (i=0;i<nv;i++) { 1164 PetscInt mark = 0,size,start; 1165 1166 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1167 for (j=ii[i];j<ii[i+1];j++) 1168 if (marks[jj[j]] && !mark) 1169 mark = marks[jj[j]]; 1170 1171 /* not relevant */ 1172 if (!mark) continue; 1173 1174 /* import extended row */ 1175 mark--; 1176 start = mark*extmem+extrowcum[mark]; 1177 size = ii[i+1]-ii[i]; 1178 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1179 ierr = PetscArraycpy(extrow+start,jj+ii[i],size);CHKERRQ(ierr); 1180 extrowcum[mark] += size; 1181 } 1182 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1183 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1184 ierr = PetscFree(marks);CHKERRQ(ierr); 1185 1186 /* Compress extrows */ 1187 cum = 0; 1188 for (i=0;i<nee;i++) { 1189 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1190 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1191 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1192 cum = PetscMax(cum,size); 1193 } 1194 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1195 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1196 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1197 1198 /* Workspace for lapack inner calls and VecSetValues */ 1199 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1200 1201 /* Create change of basis matrix (preallocation can be improved) */ 1202 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1203 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1204 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1205 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1206 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1207 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1208 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1209 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1210 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1211 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1212 1213 /* Defaults to identity */ 1214 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1215 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1216 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1217 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1218 1219 /* Create discrete gradient for the coarser level if needed */ 1220 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1221 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1222 if (pcbddc->current_level < pcbddc->max_levels) { 1223 ISLocalToGlobalMapping cel2g,cvl2g; 1224 IS wis,gwis; 1225 PetscInt cnv,cne; 1226 1227 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1228 if (fl2g) { 1229 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1230 } else { 1231 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1232 pcbddc->nedclocal = wis; 1233 } 1234 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1235 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1236 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1237 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1238 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1239 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1240 1241 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1242 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1243 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1244 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1245 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1246 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1247 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1248 1249 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1250 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1251 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1252 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1253 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1254 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1255 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1256 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1257 } 1258 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1259 1260 #if defined(PRINT_GDET) 1261 inc = 0; 1262 lev = pcbddc->current_level; 1263 #endif 1264 1265 /* Insert values in the change of basis matrix */ 1266 for (i=0;i<nee;i++) { 1267 Mat Gins = NULL, GKins = NULL; 1268 IS cornersis = NULL; 1269 PetscScalar cvals[2]; 1270 1271 if (pcbddc->nedcG) { 1272 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1273 } 1274 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1275 if (Gins && GKins) { 1276 const PetscScalar *data; 1277 const PetscInt *rows,*cols; 1278 PetscInt nrh,nch,nrc,ncc; 1279 1280 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1281 /* H1 */ 1282 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1283 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1284 ierr = MatDenseGetArrayRead(Gins,&data);CHKERRQ(ierr); 1285 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1286 ierr = MatDenseRestoreArrayRead(Gins,&data);CHKERRQ(ierr); 1287 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1288 /* complement */ 1289 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1290 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1291 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1292 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1293 ierr = MatDenseGetArrayRead(GKins,&data);CHKERRQ(ierr); 1294 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1295 ierr = MatDenseRestoreArrayRead(GKins,&data);CHKERRQ(ierr); 1296 1297 /* coarse discrete gradient */ 1298 if (pcbddc->nedcG) { 1299 PetscInt cols[2]; 1300 1301 cols[0] = 2*i; 1302 cols[1] = 2*i+1; 1303 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1304 } 1305 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1306 } 1307 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1308 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1309 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1310 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1311 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1312 } 1313 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1314 1315 /* Start assembling */ 1316 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1317 if (pcbddc->nedcG) { 1318 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1319 } 1320 1321 /* Free */ 1322 if (fl2g) { 1323 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1324 for (i=0;i<nee;i++) { 1325 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1326 } 1327 ierr = PetscFree(eedges);CHKERRQ(ierr); 1328 } 1329 1330 /* hack mat_graph with primal dofs on the coarse edges */ 1331 { 1332 PCBDDCGraph graph = pcbddc->mat_graph; 1333 PetscInt *oqueue = graph->queue; 1334 PetscInt *ocptr = graph->cptr; 1335 PetscInt ncc,*idxs; 1336 1337 /* find first primal edge */ 1338 if (pcbddc->nedclocal) { 1339 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1340 } else { 1341 if (fl2g) { 1342 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1343 } 1344 idxs = cedges; 1345 } 1346 cum = 0; 1347 while (cum < nee && cedges[cum] < 0) cum++; 1348 1349 /* adapt connected components */ 1350 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1351 graph->cptr[0] = 0; 1352 for (i=0,ncc=0;i<graph->ncc;i++) { 1353 PetscInt lc = ocptr[i+1]-ocptr[i]; 1354 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1355 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1356 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1357 ncc++; 1358 lc--; 1359 cum++; 1360 while (cum < nee && cedges[cum] < 0) cum++; 1361 } 1362 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1363 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1364 ncc++; 1365 } 1366 graph->ncc = ncc; 1367 if (pcbddc->nedclocal) { 1368 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1371 } 1372 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1373 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1374 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1375 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1376 1377 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1378 ierr = PetscFree(extrow);CHKERRQ(ierr); 1379 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1380 ierr = PetscFree(corners);CHKERRQ(ierr); 1381 ierr = PetscFree(cedges);CHKERRQ(ierr); 1382 ierr = PetscFree(extrows);CHKERRQ(ierr); 1383 ierr = PetscFree(extcols);CHKERRQ(ierr); 1384 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1385 1386 /* Complete assembling */ 1387 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1388 if (pcbddc->nedcG) { 1389 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 #if 0 1391 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1392 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1393 #endif 1394 } 1395 1396 /* set change of basis */ 1397 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1398 ierr = MatDestroy(&T);CHKERRQ(ierr); 1399 1400 PetscFunctionReturn(0); 1401 } 1402 1403 /* the near-null space of BDDC carries information on quadrature weights, 1404 and these can be collinear -> so cheat with MatNullSpaceCreate 1405 and create a suitable set of basis vectors first */ 1406 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1407 { 1408 PetscErrorCode ierr; 1409 PetscInt i; 1410 1411 PetscFunctionBegin; 1412 for (i=0;i<nvecs;i++) { 1413 PetscInt first,last; 1414 1415 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1416 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1417 if (i>=first && i < last) { 1418 PetscScalar *data; 1419 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1420 if (!has_const) { 1421 data[i-first] = 1.; 1422 } else { 1423 data[2*i-first] = 1./PetscSqrtReal(2.); 1424 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1425 } 1426 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1427 } 1428 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1429 } 1430 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1431 for (i=0;i<nvecs;i++) { /* reset vectors */ 1432 PetscInt first,last; 1433 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1434 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1435 if (i>=first && i < last) { 1436 PetscScalar *data; 1437 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1438 if (!has_const) { 1439 data[i-first] = 0.; 1440 } else { 1441 data[2*i-first] = 0.; 1442 data[2*i-first+1] = 0.; 1443 } 1444 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1445 } 1446 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1447 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1453 { 1454 Mat loc_divudotp; 1455 Vec p,v,vins,quad_vec,*quad_vecs; 1456 ISLocalToGlobalMapping map; 1457 PetscScalar *vals; 1458 const PetscScalar *array; 1459 PetscInt i,maxneighs,maxsize,*gidxs; 1460 PetscInt n_neigh,*neigh,*n_shared,**shared; 1461 PetscMPIInt rank; 1462 PetscErrorCode ierr; 1463 1464 PetscFunctionBegin; 1465 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1466 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1467 if (!maxneighs) { 1468 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1469 *nnsp = NULL; 1470 PetscFunctionReturn(0); 1471 } 1472 maxsize = 0; 1473 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1474 ierr = PetscMalloc2(maxsize,&gidxs,maxsize,&vals);CHKERRQ(ierr); 1475 /* create vectors to hold quadrature weights */ 1476 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1477 if (!transpose) { 1478 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1479 } else { 1480 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1481 } 1482 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1483 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1484 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1485 for (i=0;i<maxneighs;i++) { 1486 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1487 } 1488 1489 /* compute local quad vec */ 1490 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1491 if (!transpose) { 1492 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1493 } else { 1494 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1495 } 1496 ierr = VecSet(p,1.);CHKERRQ(ierr); 1497 if (!transpose) { 1498 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1501 } 1502 if (vl2l) { 1503 Mat lA; 1504 VecScatter sc; 1505 1506 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1507 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1508 ierr = VecScatterCreate(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1509 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1512 } else { 1513 vins = v; 1514 } 1515 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1516 ierr = VecDestroy(&p);CHKERRQ(ierr); 1517 1518 /* insert in global quadrature vecs */ 1519 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1520 for (i=0;i<n_neigh;i++) { 1521 const PetscInt *idxs; 1522 PetscInt idx,nn,j; 1523 1524 idxs = shared[i]; 1525 nn = n_shared[i]; 1526 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1527 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1528 idx = -(idx+1); 1529 ierr = ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);CHKERRQ(ierr); 1530 ierr = VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1531 } 1532 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1533 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1534 if (vl2l) { 1535 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1536 } 1537 ierr = VecDestroy(&v);CHKERRQ(ierr); 1538 ierr = PetscFree2(gidxs,vals);CHKERRQ(ierr); 1539 1540 /* assemble near null space */ 1541 for (i=0;i<maxneighs;i++) { 1542 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1543 } 1544 for (i=0;i<maxneighs;i++) { 1545 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1546 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1547 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1548 } 1549 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1554 { 1555 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1556 PetscErrorCode ierr; 1557 1558 PetscFunctionBegin; 1559 if (primalv) { 1560 if (pcbddc->user_primal_vertices_local) { 1561 IS list[2], newp; 1562 1563 list[0] = primalv; 1564 list[1] = pcbddc->user_primal_vertices_local; 1565 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1566 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1567 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1568 pcbddc->user_primal_vertices_local = newp; 1569 } else { 1570 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1571 } 1572 } 1573 PetscFunctionReturn(0); 1574 } 1575 1576 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1577 { 1578 PetscInt f, *comp = (PetscInt *)ctx; 1579 1580 PetscFunctionBegin; 1581 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1582 PetscFunctionReturn(0); 1583 } 1584 1585 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1586 { 1587 PetscErrorCode ierr; 1588 Vec local,global; 1589 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1590 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1591 PetscBool monolithic = PETSC_FALSE; 1592 1593 PetscFunctionBegin; 1594 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1595 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1596 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1597 /* need to convert from global to local topology information and remove references to information in global ordering */ 1598 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1599 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1600 ierr = VecBindToCPU(global,PETSC_TRUE);CHKERRQ(ierr); 1601 ierr = VecBindToCPU(local,PETSC_TRUE);CHKERRQ(ierr); 1602 if (monolithic) { /* just get block size to properly compute vertices */ 1603 if (pcbddc->vertex_size == 1) { 1604 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1605 } 1606 goto boundary; 1607 } 1608 1609 if (pcbddc->user_provided_isfordofs) { 1610 if (pcbddc->n_ISForDofs) { 1611 PetscInt i; 1612 1613 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1614 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1615 PetscInt bs; 1616 1617 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1618 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1619 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1620 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1621 } 1622 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1623 pcbddc->n_ISForDofs = 0; 1624 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1625 } 1626 } else { 1627 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1628 DM dm; 1629 1630 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1631 if (!dm) { 1632 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1633 } 1634 if (dm) { 1635 IS *fields; 1636 PetscInt nf,i; 1637 1638 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1639 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1640 for (i=0;i<nf;i++) { 1641 PetscInt bs; 1642 1643 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1644 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1645 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1646 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1647 } 1648 ierr = PetscFree(fields);CHKERRQ(ierr); 1649 pcbddc->n_ISForDofsLocal = nf; 1650 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1651 PetscContainer c; 1652 1653 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1654 if (c) { 1655 MatISLocalFields lf; 1656 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1657 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1658 } else { /* fallback, create the default fields if bs > 1 */ 1659 PetscInt i, n = matis->A->rmap->n; 1660 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1661 if (i > 1) { 1662 pcbddc->n_ISForDofsLocal = i; 1663 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1664 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1665 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1666 } 1667 } 1668 } 1669 } 1670 } else { 1671 PetscInt i; 1672 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1673 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1674 } 1675 } 1676 } 1677 1678 boundary: 1679 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1680 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1681 } else if (pcbddc->DirichletBoundariesLocal) { 1682 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1683 } 1684 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1685 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1686 } else if (pcbddc->NeumannBoundariesLocal) { 1687 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1688 } 1689 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1690 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1691 } 1692 ierr = VecDestroy(&global);CHKERRQ(ierr); 1693 ierr = VecDestroy(&local);CHKERRQ(ierr); 1694 /* detect local disconnected subdomains if requested (use matis->A) */ 1695 if (pcbddc->detect_disconnected) { 1696 IS primalv = NULL; 1697 PetscInt i; 1698 PetscBool filter = pcbddc->detect_disconnected_filter; 1699 1700 for (i=0;i<pcbddc->n_local_subs;i++) { 1701 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1702 } 1703 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1704 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1705 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1706 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1707 } 1708 /* early stage corner detection */ 1709 { 1710 DM dm; 1711 1712 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1713 if (!dm) { 1714 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1715 } 1716 if (dm) { 1717 PetscBool isda; 1718 1719 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1720 if (isda) { 1721 ISLocalToGlobalMapping l2l; 1722 IS corners; 1723 Mat lA; 1724 PetscBool gl,lo; 1725 1726 { 1727 Vec cvec; 1728 const PetscScalar *coords; 1729 PetscInt dof,n,cdim; 1730 PetscBool memc = PETSC_TRUE; 1731 1732 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1733 ierr = DMGetCoordinates(dm,&cvec);CHKERRQ(ierr); 1734 ierr = VecGetLocalSize(cvec,&n);CHKERRQ(ierr); 1735 ierr = VecGetBlockSize(cvec,&cdim);CHKERRQ(ierr); 1736 n /= cdim; 1737 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 1738 ierr = PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);CHKERRQ(ierr); 1739 ierr = VecGetArrayRead(cvec,&coords);CHKERRQ(ierr); 1740 #if defined(PETSC_USE_COMPLEX) 1741 memc = PETSC_FALSE; 1742 #endif 1743 if (dof != 1) memc = PETSC_FALSE; 1744 if (memc) { 1745 ierr = PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);CHKERRQ(ierr); 1746 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1747 PetscReal *bcoords = pcbddc->mat_graph->coords; 1748 PetscInt i, b, d; 1749 1750 for (i=0;i<n;i++) { 1751 for (b=0;b<dof;b++) { 1752 for (d=0;d<cdim;d++) { 1753 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1754 } 1755 } 1756 } 1757 } 1758 ierr = VecRestoreArrayRead(cvec,&coords);CHKERRQ(ierr); 1759 pcbddc->mat_graph->cdim = cdim; 1760 pcbddc->mat_graph->cnloc = dof*n; 1761 pcbddc->mat_graph->cloc = PETSC_FALSE; 1762 } 1763 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1764 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1765 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1766 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1767 lo = (PetscBool)(l2l && corners); 1768 ierr = MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1769 if (gl) { /* From PETSc's DMDA */ 1770 const PetscInt *idx; 1771 PetscInt dof,bs,*idxout,n; 1772 1773 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1774 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1775 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1776 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1777 if (bs == dof) { 1778 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1779 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1780 } else { /* the original DMDA local-to-local map have been modified */ 1781 PetscInt i,d; 1782 1783 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1784 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1785 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1786 1787 bs = 1; 1788 n *= dof; 1789 } 1790 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1791 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1792 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1793 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1794 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1795 pcbddc->corner_selected = PETSC_TRUE; 1796 pcbddc->corner_selection = PETSC_TRUE; 1797 } 1798 if (corners) { 1799 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1800 } 1801 } 1802 } 1803 } 1804 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1805 DM dm; 1806 1807 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1808 if (!dm) { 1809 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1810 } 1811 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1812 Vec vcoords; 1813 PetscSection section; 1814 PetscReal *coords; 1815 PetscInt d,cdim,nl,nf,**ctxs; 1816 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1817 1818 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1819 ierr = DMGetLocalSection(dm,§ion);CHKERRQ(ierr); 1820 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1821 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1822 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1823 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1824 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1825 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1826 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1827 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1828 for (d=0;d<cdim;d++) { 1829 PetscInt i; 1830 const PetscScalar *v; 1831 1832 for (i=0;i<nf;i++) ctxs[i][0] = d; 1833 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1834 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1835 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1836 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1837 } 1838 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1839 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1840 ierr = PetscFree(coords);CHKERRQ(ierr); 1841 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1842 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1843 } 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1849 { 1850 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1851 PetscErrorCode ierr; 1852 IS nis; 1853 const PetscInt *idxs; 1854 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1855 PetscBool *ld; 1856 1857 PetscFunctionBegin; 1858 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1859 if (mop == MPI_LAND) { 1860 /* init rootdata with true */ 1861 ld = (PetscBool*) matis->sf_rootdata; 1862 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1863 } else { 1864 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 1865 } 1866 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 1867 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1868 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1869 ld = (PetscBool*) matis->sf_leafdata; 1870 for (i=0;i<nd;i++) 1871 if (-1 < idxs[i] && idxs[i] < n) 1872 ld[idxs[i]] = PETSC_TRUE; 1873 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1874 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1875 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1876 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1877 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1878 if (mop == MPI_LAND) { 1879 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1880 } else { 1881 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1882 } 1883 for (i=0,nnd=0;i<n;i++) 1884 if (ld[i]) 1885 nidxs[nnd++] = i; 1886 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1887 ierr = ISDestroy(is);CHKERRQ(ierr); 1888 *is = nis; 1889 PetscFunctionReturn(0); 1890 } 1891 1892 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1893 { 1894 PC_IS *pcis = (PC_IS*)(pc->data); 1895 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1896 PetscErrorCode ierr; 1897 1898 PetscFunctionBegin; 1899 if (!pcbddc->benign_have_null) { 1900 PetscFunctionReturn(0); 1901 } 1902 if (pcbddc->ChangeOfBasisMatrix) { 1903 Vec swap; 1904 1905 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1906 swap = pcbddc->work_change; 1907 pcbddc->work_change = r; 1908 r = swap; 1909 } 1910 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1911 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1912 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1913 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 1914 ierr = VecSet(z,0.);CHKERRQ(ierr); 1915 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1916 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1917 if (pcbddc->ChangeOfBasisMatrix) { 1918 pcbddc->work_change = r; 1919 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1920 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1921 } 1922 PetscFunctionReturn(0); 1923 } 1924 1925 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1926 { 1927 PCBDDCBenignMatMult_ctx ctx; 1928 PetscErrorCode ierr; 1929 PetscBool apply_right,apply_left,reset_x; 1930 1931 PetscFunctionBegin; 1932 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1933 if (transpose) { 1934 apply_right = ctx->apply_left; 1935 apply_left = ctx->apply_right; 1936 } else { 1937 apply_right = ctx->apply_right; 1938 apply_left = ctx->apply_left; 1939 } 1940 reset_x = PETSC_FALSE; 1941 if (apply_right) { 1942 const PetscScalar *ax; 1943 PetscInt nl,i; 1944 1945 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1946 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1947 ierr = PetscArraycpy(ctx->work,ax,nl);CHKERRQ(ierr); 1948 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1949 for (i=0;i<ctx->benign_n;i++) { 1950 PetscScalar sum,val; 1951 const PetscInt *idxs; 1952 PetscInt nz,j; 1953 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1954 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1955 sum = 0.; 1956 if (ctx->apply_p0) { 1957 val = ctx->work[idxs[nz-1]]; 1958 for (j=0;j<nz-1;j++) { 1959 sum += ctx->work[idxs[j]]; 1960 ctx->work[idxs[j]] += val; 1961 } 1962 } else { 1963 for (j=0;j<nz-1;j++) { 1964 sum += ctx->work[idxs[j]]; 1965 } 1966 } 1967 ctx->work[idxs[nz-1]] -= sum; 1968 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1969 } 1970 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1971 reset_x = PETSC_TRUE; 1972 } 1973 if (transpose) { 1974 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1975 } else { 1976 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1977 } 1978 if (reset_x) { 1979 ierr = VecResetArray(x);CHKERRQ(ierr); 1980 } 1981 if (apply_left) { 1982 PetscScalar *ay; 1983 PetscInt i; 1984 1985 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1986 for (i=0;i<ctx->benign_n;i++) { 1987 PetscScalar sum,val; 1988 const PetscInt *idxs; 1989 PetscInt nz,j; 1990 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1991 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1992 val = -ay[idxs[nz-1]]; 1993 if (ctx->apply_p0) { 1994 sum = 0.; 1995 for (j=0;j<nz-1;j++) { 1996 sum += ay[idxs[j]]; 1997 ay[idxs[j]] += val; 1998 } 1999 ay[idxs[nz-1]] += sum; 2000 } else { 2001 for (j=0;j<nz-1;j++) { 2002 ay[idxs[j]] += val; 2003 } 2004 ay[idxs[nz-1]] = 0.; 2005 } 2006 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2007 } 2008 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2014 { 2015 PetscErrorCode ierr; 2016 2017 PetscFunctionBegin; 2018 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2023 { 2024 PetscErrorCode ierr; 2025 2026 PetscFunctionBegin; 2027 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2032 { 2033 PC_IS *pcis = (PC_IS*)pc->data; 2034 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2035 PCBDDCBenignMatMult_ctx ctx; 2036 PetscErrorCode ierr; 2037 2038 PetscFunctionBegin; 2039 if (!restore) { 2040 Mat A_IB,A_BI; 2041 PetscScalar *work; 2042 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2043 2044 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2045 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2046 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2047 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2048 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2049 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2050 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2051 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2052 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2053 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2054 ctx->apply_left = PETSC_TRUE; 2055 ctx->apply_right = PETSC_FALSE; 2056 ctx->apply_p0 = PETSC_FALSE; 2057 ctx->benign_n = pcbddc->benign_n; 2058 if (reuse) { 2059 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2060 ctx->free = PETSC_FALSE; 2061 } else { /* TODO: could be optimized for successive solves */ 2062 ISLocalToGlobalMapping N_to_D; 2063 PetscInt i; 2064 2065 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2066 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2067 for (i=0;i<pcbddc->benign_n;i++) { 2068 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2069 } 2070 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2071 ctx->free = PETSC_TRUE; 2072 } 2073 ctx->A = pcis->A_IB; 2074 ctx->work = work; 2075 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2076 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 pcis->A_IB = A_IB; 2079 2080 /* A_BI as A_IB^T */ 2081 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2082 pcbddc->benign_original_mat = pcis->A_BI; 2083 pcis->A_BI = A_BI; 2084 } else { 2085 if (!pcbddc->benign_original_mat) { 2086 PetscFunctionReturn(0); 2087 } 2088 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2089 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2090 pcis->A_IB = ctx->A; 2091 ctx->A = NULL; 2092 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2093 pcis->A_BI = pcbddc->benign_original_mat; 2094 pcbddc->benign_original_mat = NULL; 2095 if (ctx->free) { 2096 PetscInt i; 2097 for (i=0;i<ctx->benign_n;i++) { 2098 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2099 } 2100 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2101 } 2102 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2103 ierr = PetscFree(ctx);CHKERRQ(ierr); 2104 } 2105 PetscFunctionReturn(0); 2106 } 2107 2108 /* used just in bddc debug mode */ 2109 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2110 { 2111 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2112 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2113 Mat An; 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2118 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2119 if (is1) { 2120 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2121 ierr = MatDestroy(&An);CHKERRQ(ierr); 2122 } else { 2123 *B = An; 2124 } 2125 PetscFunctionReturn(0); 2126 } 2127 2128 /* TODO: add reuse flag */ 2129 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2130 { 2131 Mat Bt; 2132 PetscScalar *a,*bdata; 2133 const PetscInt *ii,*ij; 2134 PetscInt m,n,i,nnz,*bii,*bij; 2135 PetscBool flg_row; 2136 PetscErrorCode ierr; 2137 2138 PetscFunctionBegin; 2139 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2140 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2141 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2142 nnz = n; 2143 for (i=0;i<ii[n];i++) { 2144 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2145 } 2146 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2147 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2148 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2149 nnz = 0; 2150 bii[0] = 0; 2151 for (i=0;i<n;i++) { 2152 PetscInt j; 2153 for (j=ii[i];j<ii[i+1];j++) { 2154 PetscScalar entry = a[j]; 2155 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2156 bij[nnz] = ij[j]; 2157 bdata[nnz] = entry; 2158 nnz++; 2159 } 2160 } 2161 bii[i+1] = nnz; 2162 } 2163 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2164 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2165 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2166 { 2167 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2168 b->free_a = PETSC_TRUE; 2169 b->free_ij = PETSC_TRUE; 2170 } 2171 if (*B == A) { 2172 ierr = MatDestroy(&A);CHKERRQ(ierr); 2173 } 2174 *B = Bt; 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2179 { 2180 Mat B = NULL; 2181 DM dm; 2182 IS is_dummy,*cc_n; 2183 ISLocalToGlobalMapping l2gmap_dummy; 2184 PCBDDCGraph graph; 2185 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2186 PetscInt i,n; 2187 PetscInt *xadj,*adjncy; 2188 PetscBool isplex = PETSC_FALSE; 2189 PetscErrorCode ierr; 2190 2191 PetscFunctionBegin; 2192 if (ncc) *ncc = 0; 2193 if (cc) *cc = NULL; 2194 if (primalv) *primalv = NULL; 2195 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2196 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2197 if (!dm) { 2198 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2199 } 2200 if (dm) { 2201 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2202 } 2203 if (filter) isplex = PETSC_FALSE; 2204 2205 if (isplex) { /* this code has been modified from plexpartition.c */ 2206 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2207 PetscInt *adj = NULL; 2208 IS cellNumbering; 2209 const PetscInt *cellNum; 2210 PetscBool useCone, useClosure; 2211 PetscSection section; 2212 PetscSegBuffer adjBuffer; 2213 PetscSF sfPoint; 2214 PetscErrorCode ierr; 2215 2216 PetscFunctionBegin; 2217 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2218 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2219 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2220 /* Build adjacency graph via a section/segbuffer */ 2221 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2222 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2223 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2224 /* Always use FVM adjacency to create partitioner graph */ 2225 ierr = DMGetBasicAdjacency(dm, &useCone, &useClosure);CHKERRQ(ierr); 2226 ierr = DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);CHKERRQ(ierr); 2227 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2228 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2229 for (n = 0, p = pStart; p < pEnd; p++) { 2230 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2231 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2232 adjSize = PETSC_DETERMINE; 2233 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2234 for (a = 0; a < adjSize; ++a) { 2235 const PetscInt point = adj[a]; 2236 if (pStart <= point && point < pEnd) { 2237 PetscInt *PETSC_RESTRICT pBuf; 2238 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2239 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2240 *pBuf = point; 2241 } 2242 } 2243 n++; 2244 } 2245 ierr = DMSetBasicAdjacency(dm, useCone, useClosure);CHKERRQ(ierr); 2246 /* Derive CSR graph from section/segbuffer */ 2247 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2248 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2249 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2250 for (idx = 0, p = pStart; p < pEnd; p++) { 2251 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2252 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2253 } 2254 xadj[n] = size; 2255 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2256 /* Clean up */ 2257 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2258 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2259 ierr = PetscFree(adj);CHKERRQ(ierr); 2260 graph->xadj = xadj; 2261 graph->adjncy = adjncy; 2262 } else { 2263 Mat A; 2264 PetscBool isseqaij, flg_row; 2265 2266 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2267 if (!A->rmap->N || !A->cmap->N) { 2268 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2272 if (!isseqaij && filter) { 2273 PetscBool isseqdense; 2274 2275 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2276 if (!isseqdense) { 2277 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2278 } else { /* TODO: rectangular case and LDA */ 2279 PetscScalar *array; 2280 PetscReal chop=1.e-6; 2281 2282 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2283 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2284 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2285 for (i=0;i<n;i++) { 2286 PetscInt j; 2287 for (j=i+1;j<n;j++) { 2288 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2289 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2290 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2291 } 2292 } 2293 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2294 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2295 } 2296 } else { 2297 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2298 B = A; 2299 } 2300 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2301 2302 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2303 if (filter) { 2304 PetscScalar *data; 2305 PetscInt j,cum; 2306 2307 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2308 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2309 cum = 0; 2310 for (i=0;i<n;i++) { 2311 PetscInt t; 2312 2313 for (j=xadj[i];j<xadj[i+1];j++) { 2314 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2315 continue; 2316 } 2317 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2318 } 2319 t = xadj_filtered[i]; 2320 xadj_filtered[i] = cum; 2321 cum += t; 2322 } 2323 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2324 graph->xadj = xadj_filtered; 2325 graph->adjncy = adjncy_filtered; 2326 } else { 2327 graph->xadj = xadj; 2328 graph->adjncy = adjncy; 2329 } 2330 } 2331 /* compute local connected components using PCBDDCGraph */ 2332 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2333 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2334 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2335 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2336 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2337 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2338 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2339 2340 /* partial clean up */ 2341 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2342 if (B) { 2343 PetscBool flg_row; 2344 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2345 ierr = MatDestroy(&B);CHKERRQ(ierr); 2346 } 2347 if (isplex) { 2348 ierr = PetscFree(xadj);CHKERRQ(ierr); 2349 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2350 } 2351 2352 /* get back data */ 2353 if (isplex) { 2354 if (ncc) *ncc = graph->ncc; 2355 if (cc || primalv) { 2356 Mat A; 2357 PetscBT btv,btvt; 2358 PetscSection subSection; 2359 PetscInt *ids,cum,cump,*cids,*pids; 2360 2361 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2362 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2363 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2364 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2365 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2366 2367 cids[0] = 0; 2368 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2369 PetscInt j; 2370 2371 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2372 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2373 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2374 2375 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2376 for (k = 0; k < 2*size; k += 2) { 2377 PetscInt s, pp, p = closure[k], off, dof, cdof; 2378 2379 ierr = PetscSectionGetConstraintDof(subSection,p,&cdof);CHKERRQ(ierr); 2380 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2381 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2382 for (s = 0; s < dof-cdof; s++) { 2383 if (PetscBTLookupSet(btvt,off+s)) continue; 2384 if (!PetscBTLookup(btv,off+s)) { 2385 ids[cum++] = off+s; 2386 } else { /* cross-vertex */ 2387 pids[cump++] = off+s; 2388 } 2389 } 2390 ierr = DMPlexGetTreeParent(dm,p,&pp,NULL);CHKERRQ(ierr); 2391 if (pp != p) { 2392 ierr = PetscSectionGetConstraintDof(subSection,pp,&cdof);CHKERRQ(ierr); 2393 ierr = PetscSectionGetOffset(subSection,pp,&off);CHKERRQ(ierr); 2394 ierr = PetscSectionGetDof(subSection,pp,&dof);CHKERRQ(ierr); 2395 for (s = 0; s < dof-cdof; s++) { 2396 if (PetscBTLookupSet(btvt,off+s)) continue; 2397 if (!PetscBTLookup(btv,off+s)) { 2398 ids[cum++] = off+s; 2399 } else { /* cross-vertex */ 2400 pids[cump++] = off+s; 2401 } 2402 } 2403 } 2404 } 2405 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2406 } 2407 cids[i+1] = cum; 2408 /* mark dofs as already assigned */ 2409 for (j = cids[i]; j < cids[i+1]; j++) { 2410 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2411 } 2412 } 2413 if (cc) { 2414 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2415 for (i = 0; i < graph->ncc; i++) { 2416 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2417 } 2418 *cc = cc_n; 2419 } 2420 if (primalv) { 2421 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2422 } 2423 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2424 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2425 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2426 } 2427 } else { 2428 if (ncc) *ncc = graph->ncc; 2429 if (cc) { 2430 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2431 for (i=0;i<graph->ncc;i++) { 2432 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2433 } 2434 *cc = cc_n; 2435 } 2436 } 2437 /* clean up graph */ 2438 graph->xadj = 0; 2439 graph->adjncy = 0; 2440 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2441 PetscFunctionReturn(0); 2442 } 2443 2444 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2445 { 2446 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2447 PC_IS* pcis = (PC_IS*)(pc->data); 2448 IS dirIS = NULL; 2449 PetscInt i; 2450 PetscErrorCode ierr; 2451 2452 PetscFunctionBegin; 2453 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2454 if (zerodiag) { 2455 Mat A; 2456 Vec vec3_N; 2457 PetscScalar *vals; 2458 const PetscInt *idxs; 2459 PetscInt nz,*count; 2460 2461 /* p0 */ 2462 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2463 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2464 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2465 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2466 for (i=0;i<nz;i++) vals[i] = 1.; 2467 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2468 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2469 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2470 /* v_I */ 2471 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2472 for (i=0;i<nz;i++) vals[i] = 0.; 2473 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2474 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2475 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2476 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2477 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2478 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2479 if (dirIS) { 2480 PetscInt n; 2481 2482 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2483 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2484 for (i=0;i<n;i++) vals[i] = 0.; 2485 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2486 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2487 } 2488 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2489 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2490 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2491 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2492 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2493 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2494 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2495 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2496 ierr = PetscFree(vals);CHKERRQ(ierr); 2497 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2498 2499 /* there should not be any pressure dofs lying on the interface */ 2500 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2501 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2502 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2503 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2504 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2505 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2506 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2507 ierr = PetscFree(count);CHKERRQ(ierr); 2508 } 2509 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2510 2511 /* check PCBDDCBenignGetOrSetP0 */ 2512 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2513 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2514 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2515 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2516 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2517 for (i=0;i<pcbddc->benign_n;i++) { 2518 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2519 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2520 } 2521 PetscFunctionReturn(0); 2522 } 2523 2524 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2525 { 2526 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2527 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2528 PetscInt nz,n,benign_n,bsp = 1; 2529 PetscInt *interior_dofs,n_interior_dofs,nneu; 2530 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2531 PetscErrorCode ierr; 2532 2533 PetscFunctionBegin; 2534 if (reuse) goto project_b0; 2535 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2536 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2537 for (n=0;n<pcbddc->benign_n;n++) { 2538 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2539 } 2540 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2541 has_null_pressures = PETSC_TRUE; 2542 have_null = PETSC_TRUE; 2543 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2544 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2545 Checks if all the pressure dofs in each subdomain have a zero diagonal 2546 If not, a change of basis on pressures is not needed 2547 since the local Schur complements are already SPD 2548 */ 2549 if (pcbddc->n_ISForDofsLocal) { 2550 IS iP = NULL; 2551 PetscInt p,*pp; 2552 PetscBool flg; 2553 2554 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);CHKERRQ(ierr); 2555 n = pcbddc->n_ISForDofsLocal; 2556 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2557 ierr = PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);CHKERRQ(ierr); 2558 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2559 if (!flg) { 2560 n = 1; 2561 pp[0] = pcbddc->n_ISForDofsLocal-1; 2562 } 2563 2564 bsp = 0; 2565 for (p=0;p<n;p++) { 2566 PetscInt bs; 2567 2568 if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2569 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2570 bsp += bs; 2571 } 2572 ierr = PetscMalloc1(bsp,&bzerodiag);CHKERRQ(ierr); 2573 bsp = 0; 2574 for (p=0;p<n;p++) { 2575 const PetscInt *idxs; 2576 PetscInt b,bs,npl,*bidxs; 2577 2578 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2579 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);CHKERRQ(ierr); 2580 ierr = ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2581 ierr = PetscMalloc1(npl/bs,&bidxs);CHKERRQ(ierr); 2582 for (b=0;b<bs;b++) { 2583 PetscInt i; 2584 2585 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2586 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);CHKERRQ(ierr); 2587 bsp++; 2588 } 2589 ierr = PetscFree(bidxs);CHKERRQ(ierr); 2590 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2591 } 2592 ierr = ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);CHKERRQ(ierr); 2593 2594 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2595 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2596 if (iP) { 2597 IS newpressures; 2598 2599 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2600 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2601 pressures = newpressures; 2602 } 2603 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2604 if (!sorted) { 2605 ierr = ISSort(pressures);CHKERRQ(ierr); 2606 } 2607 ierr = PetscFree(pp);CHKERRQ(ierr); 2608 } 2609 2610 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2611 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2612 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2613 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2614 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2615 if (!sorted) { 2616 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2617 } 2618 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2619 zerodiag_save = zerodiag; 2620 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2621 if (!nz) { 2622 if (n) have_null = PETSC_FALSE; 2623 has_null_pressures = PETSC_FALSE; 2624 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2625 } 2626 recompute_zerodiag = PETSC_FALSE; 2627 2628 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2629 zerodiag_subs = NULL; 2630 benign_n = 0; 2631 n_interior_dofs = 0; 2632 interior_dofs = NULL; 2633 nneu = 0; 2634 if (pcbddc->NeumannBoundariesLocal) { 2635 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2636 } 2637 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2638 if (checkb) { /* need to compute interior nodes */ 2639 PetscInt n,i,j; 2640 PetscInt n_neigh,*neigh,*n_shared,**shared; 2641 PetscInt *iwork; 2642 2643 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2644 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2645 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2646 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2647 for (i=1;i<n_neigh;i++) 2648 for (j=0;j<n_shared[i];j++) 2649 iwork[shared[i][j]] += 1; 2650 for (i=0;i<n;i++) 2651 if (!iwork[i]) 2652 interior_dofs[n_interior_dofs++] = i; 2653 ierr = PetscFree(iwork);CHKERRQ(ierr); 2654 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2655 } 2656 if (has_null_pressures) { 2657 IS *subs; 2658 PetscInt nsubs,i,j,nl; 2659 const PetscInt *idxs; 2660 PetscScalar *array; 2661 Vec *work; 2662 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2663 2664 subs = pcbddc->local_subs; 2665 nsubs = pcbddc->n_local_subs; 2666 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2667 if (checkb) { 2668 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2669 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2670 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2671 /* work[0] = 1_p */ 2672 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2673 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2674 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2675 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2676 /* work[0] = 1_v */ 2677 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2678 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2679 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2680 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2681 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2682 } 2683 2684 if (nsubs > 1 || bsp > 1) { 2685 IS *is; 2686 PetscInt b,totb; 2687 2688 totb = bsp; 2689 is = bsp > 1 ? bzerodiag : &zerodiag; 2690 nsubs = PetscMax(nsubs,1); 2691 ierr = PetscCalloc1(nsubs*totb,&zerodiag_subs);CHKERRQ(ierr); 2692 for (b=0;b<totb;b++) { 2693 for (i=0;i<nsubs;i++) { 2694 ISLocalToGlobalMapping l2g; 2695 IS t_zerodiag_subs; 2696 PetscInt nl; 2697 2698 if (subs) { 2699 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2700 } else { 2701 IS tis; 2702 2703 ierr = MatGetLocalSize(pcbddc->local_mat,&nl,NULL);CHKERRQ(ierr); 2704 ierr = ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);CHKERRQ(ierr); 2705 ierr = ISLocalToGlobalMappingCreateIS(tis,&l2g);CHKERRQ(ierr); 2706 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2707 } 2708 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);CHKERRQ(ierr); 2709 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2710 if (nl) { 2711 PetscBool valid = PETSC_TRUE; 2712 2713 if (checkb) { 2714 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2715 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2716 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2717 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2718 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2719 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2720 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2721 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2722 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2723 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2724 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2725 for (j=0;j<n_interior_dofs;j++) { 2726 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2727 valid = PETSC_FALSE; 2728 break; 2729 } 2730 } 2731 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2732 } 2733 if (valid && nneu) { 2734 const PetscInt *idxs; 2735 PetscInt nzb; 2736 2737 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2738 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2739 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2740 if (nzb) valid = PETSC_FALSE; 2741 } 2742 if (valid && pressures) { 2743 IS t_pressure_subs,tmp; 2744 PetscInt i1,i2; 2745 2746 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2747 ierr = ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);CHKERRQ(ierr); 2748 ierr = ISGetLocalSize(tmp,&i1);CHKERRQ(ierr); 2749 ierr = ISGetLocalSize(t_zerodiag_subs,&i2);CHKERRQ(ierr); 2750 if (i2 != i1) valid = PETSC_FALSE; 2751 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2752 ierr = ISDestroy(&tmp);CHKERRQ(ierr); 2753 } 2754 if (valid) { 2755 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);CHKERRQ(ierr); 2756 benign_n++; 2757 } else recompute_zerodiag = PETSC_TRUE; 2758 } 2759 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2760 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2761 } 2762 } 2763 } else { /* there's just one subdomain (or zero if they have not been detected */ 2764 PetscBool valid = PETSC_TRUE; 2765 2766 if (nneu) valid = PETSC_FALSE; 2767 if (valid && pressures) { 2768 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2769 } 2770 if (valid && checkb) { 2771 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2772 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2773 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2774 for (j=0;j<n_interior_dofs;j++) { 2775 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2776 valid = PETSC_FALSE; 2777 break; 2778 } 2779 } 2780 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2781 } 2782 if (valid) { 2783 benign_n = 1; 2784 ierr = PetscMalloc1(benign_n,&zerodiag_subs);CHKERRQ(ierr); 2785 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2786 zerodiag_subs[0] = zerodiag; 2787 } 2788 } 2789 if (checkb) { 2790 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2791 } 2792 } 2793 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2794 2795 if (!benign_n) { 2796 PetscInt n; 2797 2798 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2799 recompute_zerodiag = PETSC_FALSE; 2800 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2801 if (n) have_null = PETSC_FALSE; 2802 } 2803 2804 /* final check for null pressures */ 2805 if (zerodiag && pressures) { 2806 ierr = ISEqual(pressures,zerodiag,&have_null);CHKERRQ(ierr); 2807 } 2808 2809 if (recompute_zerodiag) { 2810 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2811 if (benign_n == 1) { 2812 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2813 zerodiag = zerodiag_subs[0]; 2814 } else { 2815 PetscInt i,nzn,*new_idxs; 2816 2817 nzn = 0; 2818 for (i=0;i<benign_n;i++) { 2819 PetscInt ns; 2820 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2821 nzn += ns; 2822 } 2823 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2824 nzn = 0; 2825 for (i=0;i<benign_n;i++) { 2826 PetscInt ns,*idxs; 2827 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2828 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2829 ierr = PetscArraycpy(new_idxs+nzn,idxs,ns);CHKERRQ(ierr); 2830 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2831 nzn += ns; 2832 } 2833 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2834 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2835 } 2836 have_null = PETSC_FALSE; 2837 } 2838 2839 /* determines if the coarse solver will be singular or not */ 2840 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2841 2842 /* Prepare matrix to compute no-net-flux */ 2843 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2844 Mat A,loc_divudotp; 2845 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2846 IS row,col,isused = NULL; 2847 PetscInt M,N,n,st,n_isused; 2848 2849 if (pressures) { 2850 isused = pressures; 2851 } else { 2852 isused = zerodiag_save; 2853 } 2854 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2855 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2856 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2857 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2858 n_isused = 0; 2859 if (isused) { 2860 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2861 } 2862 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2863 st = st-n_isused; 2864 if (n) { 2865 const PetscInt *gidxs; 2866 2867 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2868 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2869 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2870 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2871 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2872 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2873 } else { 2874 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2875 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2876 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2877 } 2878 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2879 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2880 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2881 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2882 ierr = ISDestroy(&row);CHKERRQ(ierr); 2883 ierr = ISDestroy(&col);CHKERRQ(ierr); 2884 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2885 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2886 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2887 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2888 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2889 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2890 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2891 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2892 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2893 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2894 } 2895 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2896 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2897 if (bzerodiag) { 2898 PetscInt i; 2899 2900 for (i=0;i<bsp;i++) { 2901 ierr = ISDestroy(&bzerodiag[i]);CHKERRQ(ierr); 2902 } 2903 ierr = PetscFree(bzerodiag);CHKERRQ(ierr); 2904 } 2905 pcbddc->benign_n = benign_n; 2906 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2907 2908 /* determines if the problem has subdomains with 0 pressure block */ 2909 have_null = (PetscBool)(!!pcbddc->benign_n); 2910 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2911 2912 project_b0: 2913 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2914 /* change of basis and p0 dofs */ 2915 if (pcbddc->benign_n) { 2916 PetscInt i,s,*nnz; 2917 2918 /* local change of basis for pressures */ 2919 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2920 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2921 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2922 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2923 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2924 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2925 for (i=0;i<pcbddc->benign_n;i++) { 2926 const PetscInt *idxs; 2927 PetscInt nzs,j; 2928 2929 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2930 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2931 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2932 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2933 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2934 } 2935 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2936 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2937 ierr = PetscFree(nnz);CHKERRQ(ierr); 2938 /* set identity by default */ 2939 for (i=0;i<n;i++) { 2940 ierr = MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);CHKERRQ(ierr); 2941 } 2942 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2943 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2944 /* set change on pressures */ 2945 for (s=0;s<pcbddc->benign_n;s++) { 2946 PetscScalar *array; 2947 const PetscInt *idxs; 2948 PetscInt nzs; 2949 2950 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2951 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2952 for (i=0;i<nzs-1;i++) { 2953 PetscScalar vals[2]; 2954 PetscInt cols[2]; 2955 2956 cols[0] = idxs[i]; 2957 cols[1] = idxs[nzs-1]; 2958 vals[0] = 1.; 2959 vals[1] = 1.; 2960 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2961 } 2962 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2963 for (i=0;i<nzs-1;i++) array[i] = -1.; 2964 array[nzs-1] = 1.; 2965 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2966 /* store local idxs for p0 */ 2967 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2968 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2969 ierr = PetscFree(array);CHKERRQ(ierr); 2970 } 2971 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2973 2974 /* project if needed */ 2975 if (pcbddc->benign_change_explicit) { 2976 Mat M; 2977 2978 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2979 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2980 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2981 ierr = MatDestroy(&M);CHKERRQ(ierr); 2982 } 2983 /* store global idxs for p0 */ 2984 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2985 } 2986 *zerodiaglocal = zerodiag; 2987 PetscFunctionReturn(0); 2988 } 2989 2990 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2991 { 2992 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2993 PetscScalar *array; 2994 PetscErrorCode ierr; 2995 2996 PetscFunctionBegin; 2997 if (!pcbddc->benign_sf) { 2998 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2999 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 3000 } 3001 if (get) { 3002 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3003 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 3004 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 3005 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3006 } else { 3007 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 3008 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 3009 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 3010 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 3011 } 3012 PetscFunctionReturn(0); 3013 } 3014 3015 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3016 { 3017 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3018 PetscErrorCode ierr; 3019 3020 PetscFunctionBegin; 3021 /* TODO: add error checking 3022 - avoid nested pop (or push) calls. 3023 - cannot push before pop. 3024 - cannot call this if pcbddc->local_mat is NULL 3025 */ 3026 if (!pcbddc->benign_n) { 3027 PetscFunctionReturn(0); 3028 } 3029 if (pop) { 3030 if (pcbddc->benign_change_explicit) { 3031 IS is_p0; 3032 MatReuse reuse; 3033 3034 /* extract B_0 */ 3035 reuse = MAT_INITIAL_MATRIX; 3036 if (pcbddc->benign_B0) { 3037 reuse = MAT_REUSE_MATRIX; 3038 } 3039 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 3040 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 3041 /* remove rows and cols from local problem */ 3042 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 3043 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3044 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 3045 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3046 } else { 3047 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3048 PetscScalar *vals; 3049 PetscInt i,n,*idxs_ins; 3050 3051 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 3052 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 3053 if (!pcbddc->benign_B0) { 3054 PetscInt *nnz; 3055 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 3056 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 3057 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3058 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 3059 for (i=0;i<pcbddc->benign_n;i++) { 3060 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 3061 nnz[i] = n - nnz[i]; 3062 } 3063 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 3064 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3065 ierr = PetscFree(nnz);CHKERRQ(ierr); 3066 } 3067 3068 for (i=0;i<pcbddc->benign_n;i++) { 3069 PetscScalar *array; 3070 PetscInt *idxs,j,nz,cum; 3071 3072 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 3073 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3074 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3075 for (j=0;j<nz;j++) vals[j] = 1.; 3076 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3077 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 3078 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 3079 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 3080 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3081 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3082 cum = 0; 3083 for (j=0;j<n;j++) { 3084 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3085 vals[cum] = array[j]; 3086 idxs_ins[cum] = j; 3087 cum++; 3088 } 3089 } 3090 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3091 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3092 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3093 } 3094 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3096 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3097 } 3098 } else { /* push */ 3099 if (pcbddc->benign_change_explicit) { 3100 PetscInt i; 3101 3102 for (i=0;i<pcbddc->benign_n;i++) { 3103 PetscScalar *B0_vals; 3104 PetscInt *B0_cols,B0_ncol; 3105 3106 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3107 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3108 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3109 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3110 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3111 } 3112 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3113 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3114 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3115 } 3116 PetscFunctionReturn(0); 3117 } 3118 3119 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3120 { 3121 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3122 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3123 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3124 PetscBLASInt *B_iwork,*B_ifail; 3125 PetscScalar *work,lwork; 3126 PetscScalar *St,*S,*eigv; 3127 PetscScalar *Sarray,*Starray; 3128 PetscReal *eigs,thresh,lthresh,uthresh; 3129 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3130 PetscBool allocated_S_St; 3131 #if defined(PETSC_USE_COMPLEX) 3132 PetscReal *rwork; 3133 #endif 3134 PetscErrorCode ierr; 3135 3136 PetscFunctionBegin; 3137 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3138 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3139 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3140 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3141 3142 if (pcbddc->dbg_flag) { 3143 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3144 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3146 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3147 } 3148 3149 if (pcbddc->dbg_flag) { 3150 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3151 } 3152 3153 /* max size of subsets */ 3154 mss = 0; 3155 for (i=0;i<sub_schurs->n_subs;i++) { 3156 PetscInt subset_size; 3157 3158 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3159 mss = PetscMax(mss,subset_size); 3160 } 3161 3162 /* min/max and threshold */ 3163 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3164 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3165 nmax = PetscMax(nmin,nmax); 3166 allocated_S_St = PETSC_FALSE; 3167 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3168 allocated_S_St = PETSC_TRUE; 3169 } 3170 3171 /* allocate lapack workspace */ 3172 cum = cum2 = 0; 3173 maxneigs = 0; 3174 for (i=0;i<sub_schurs->n_subs;i++) { 3175 PetscInt n,subset_size; 3176 3177 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3178 n = PetscMin(subset_size,nmax); 3179 cum += subset_size; 3180 cum2 += subset_size*n; 3181 maxneigs = PetscMax(maxneigs,n); 3182 } 3183 lwork = 0; 3184 if (mss) { 3185 if (sub_schurs->is_symmetric) { 3186 PetscScalar sdummy = 0.; 3187 PetscBLASInt B_itype = 1; 3188 PetscBLASInt B_N = mss, idummy = 0; 3189 PetscReal rdummy = 0.,zero = 0.0; 3190 PetscReal eps = 0.0; /* dlamch? */ 3191 3192 B_lwork = -1; 3193 /* some implementations may complain about NULL pointers, even if we are querying */ 3194 S = &sdummy; 3195 St = &sdummy; 3196 eigs = &rdummy; 3197 eigv = &sdummy; 3198 B_iwork = &idummy; 3199 B_ifail = &idummy; 3200 #if defined(PETSC_USE_COMPLEX) 3201 rwork = &rdummy; 3202 #endif 3203 thresh = 1.0; 3204 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3205 #if defined(PETSC_USE_COMPLEX) 3206 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3207 #else 3208 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3209 #endif 3210 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3211 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3212 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3213 } 3214 3215 nv = 0; 3216 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3217 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3218 } 3219 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3220 if (allocated_S_St) { 3221 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3222 } 3223 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3224 #if defined(PETSC_USE_COMPLEX) 3225 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3226 #endif 3227 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3228 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3229 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3230 nv+cum,&pcbddc->adaptive_constraints_idxs, 3231 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3232 ierr = PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);CHKERRQ(ierr); 3233 3234 maxneigs = 0; 3235 cum = cumarray = 0; 3236 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3237 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3238 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3239 const PetscInt *idxs; 3240 3241 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3242 for (cum=0;cum<nv;cum++) { 3243 pcbddc->adaptive_constraints_n[cum] = 1; 3244 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3245 pcbddc->adaptive_constraints_data[cum] = 1.0; 3246 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3247 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3248 } 3249 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3250 } 3251 3252 if (mss) { /* multilevel */ 3253 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3254 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3255 } 3256 3257 lthresh = pcbddc->adaptive_threshold[0]; 3258 uthresh = pcbddc->adaptive_threshold[1]; 3259 for (i=0;i<sub_schurs->n_subs;i++) { 3260 const PetscInt *idxs; 3261 PetscReal upper,lower; 3262 PetscInt j,subset_size,eigs_start = 0; 3263 PetscBLASInt B_N; 3264 PetscBool same_data = PETSC_FALSE; 3265 PetscBool scal = PETSC_FALSE; 3266 3267 if (pcbddc->use_deluxe_scaling) { 3268 upper = PETSC_MAX_REAL; 3269 lower = uthresh; 3270 } else { 3271 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3272 upper = 1./uthresh; 3273 lower = 0.; 3274 } 3275 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3276 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3277 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3278 /* this is experimental: we assume the dofs have been properly grouped to have 3279 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3280 if (!sub_schurs->is_posdef) { 3281 Mat T; 3282 3283 for (j=0;j<subset_size;j++) { 3284 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3285 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3286 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3287 ierr = MatDestroy(&T);CHKERRQ(ierr); 3288 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3289 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3290 ierr = MatDestroy(&T);CHKERRQ(ierr); 3291 if (sub_schurs->change_primal_sub) { 3292 PetscInt nz,k; 3293 const PetscInt *idxs; 3294 3295 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3296 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3297 for (k=0;k<nz;k++) { 3298 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3299 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3300 } 3301 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3302 } 3303 scal = PETSC_TRUE; 3304 break; 3305 } 3306 } 3307 } 3308 3309 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3310 if (sub_schurs->is_symmetric) { 3311 PetscInt j,k; 3312 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3313 ierr = PetscArrayzero(S,subset_size*subset_size);CHKERRQ(ierr); 3314 ierr = PetscArrayzero(St,subset_size*subset_size);CHKERRQ(ierr); 3315 } 3316 for (j=0;j<subset_size;j++) { 3317 for (k=j;k<subset_size;k++) { 3318 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3319 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3320 } 3321 } 3322 } else { 3323 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3324 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3325 } 3326 } else { 3327 S = Sarray + cumarray; 3328 St = Starray + cumarray; 3329 } 3330 /* see if we can save some work */ 3331 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3332 ierr = PetscArraycmp(S,St,subset_size*subset_size,&same_data);CHKERRQ(ierr); 3333 } 3334 3335 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3336 B_neigs = 0; 3337 } else { 3338 if (sub_schurs->is_symmetric) { 3339 PetscBLASInt B_itype = 1; 3340 PetscBLASInt B_IL, B_IU; 3341 PetscReal eps = -1.0; /* dlamch? */ 3342 PetscInt nmin_s; 3343 PetscBool compute_range; 3344 3345 B_neigs = 0; 3346 compute_range = (PetscBool)!same_data; 3347 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3348 3349 if (pcbddc->dbg_flag) { 3350 PetscInt nc = 0; 3351 3352 if (sub_schurs->change_primal_sub) { 3353 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3354 } 3355 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3356 } 3357 3358 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3359 if (compute_range) { 3360 3361 /* ask for eigenvalues larger than thresh */ 3362 if (sub_schurs->is_posdef) { 3363 #if defined(PETSC_USE_COMPLEX) 3364 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3365 #else 3366 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3367 #endif 3368 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3369 } else { /* no theory so far, but it works nicely */ 3370 PetscInt recipe = 0,recipe_m = 1; 3371 PetscReal bb[2]; 3372 3373 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3374 switch (recipe) { 3375 case 0: 3376 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3377 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3378 #if defined(PETSC_USE_COMPLEX) 3379 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3380 #else 3381 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3382 #endif 3383 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3384 break; 3385 case 1: 3386 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3387 #if defined(PETSC_USE_COMPLEX) 3388 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3389 #else 3390 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3391 #endif 3392 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3393 if (!scal) { 3394 PetscBLASInt B_neigs2 = 0; 3395 3396 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3397 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3398 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3399 #if defined(PETSC_USE_COMPLEX) 3400 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3401 #else 3402 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3403 #endif 3404 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3405 B_neigs += B_neigs2; 3406 } 3407 break; 3408 case 2: 3409 if (scal) { 3410 bb[0] = PETSC_MIN_REAL; 3411 bb[1] = 0; 3412 #if defined(PETSC_USE_COMPLEX) 3413 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3414 #else 3415 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3416 #endif 3417 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3418 } else { 3419 PetscBLASInt B_neigs2 = 0; 3420 PetscBool import = PETSC_FALSE; 3421 3422 lthresh = PetscMax(lthresh,0.0); 3423 if (lthresh > 0.0) { 3424 bb[0] = PETSC_MIN_REAL; 3425 bb[1] = lthresh*lthresh; 3426 3427 import = PETSC_TRUE; 3428 #if defined(PETSC_USE_COMPLEX) 3429 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3430 #else 3431 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3432 #endif 3433 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3434 } 3435 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3436 bb[1] = PETSC_MAX_REAL; 3437 if (import) { 3438 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3439 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3440 } 3441 #if defined(PETSC_USE_COMPLEX) 3442 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3443 #else 3444 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3445 #endif 3446 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3447 B_neigs += B_neigs2; 3448 } 3449 break; 3450 case 3: 3451 if (scal) { 3452 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3453 } else { 3454 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3455 } 3456 if (!scal) { 3457 bb[0] = uthresh; 3458 bb[1] = PETSC_MAX_REAL; 3459 #if defined(PETSC_USE_COMPLEX) 3460 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3461 #else 3462 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3463 #endif 3464 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3465 } 3466 if (recipe_m > 0 && B_N - B_neigs > 0) { 3467 PetscBLASInt B_neigs2 = 0; 3468 3469 B_IL = 1; 3470 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3471 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3472 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3473 #if defined(PETSC_USE_COMPLEX) 3474 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3475 #else 3476 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3477 #endif 3478 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3479 B_neigs += B_neigs2; 3480 } 3481 break; 3482 case 4: 3483 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3484 #if defined(PETSC_USE_COMPLEX) 3485 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3486 #else 3487 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3488 #endif 3489 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3490 { 3491 PetscBLASInt B_neigs2 = 0; 3492 3493 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3494 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3495 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3496 #if defined(PETSC_USE_COMPLEX) 3497 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3498 #else 3499 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3500 #endif 3501 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3502 B_neigs += B_neigs2; 3503 } 3504 break; 3505 case 5: /* same as before: first compute all eigenvalues, then filter */ 3506 #if defined(PETSC_USE_COMPLEX) 3507 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3508 #else 3509 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3510 #endif 3511 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3512 { 3513 PetscInt e,k,ne; 3514 for (e=0,ne=0;e<B_neigs;e++) { 3515 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3516 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3517 eigs[ne] = eigs[e]; 3518 ne++; 3519 } 3520 } 3521 ierr = PetscArraycpy(eigv,S,B_N*ne);CHKERRQ(ierr); 3522 B_neigs = ne; 3523 } 3524 break; 3525 default: 3526 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3527 break; 3528 } 3529 } 3530 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3531 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3532 B_IL = 1; 3533 #if defined(PETSC_USE_COMPLEX) 3534 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3535 #else 3536 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3537 #endif 3538 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3539 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3540 PetscInt k; 3541 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3542 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3543 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3544 nmin = nmax; 3545 ierr = PetscArrayzero(eigv,subset_size*nmax);CHKERRQ(ierr); 3546 for (k=0;k<nmax;k++) { 3547 eigs[k] = 1./PETSC_SMALL; 3548 eigv[k*(subset_size+1)] = 1.0; 3549 } 3550 } 3551 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3552 if (B_ierr) { 3553 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3554 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3555 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3556 } 3557 3558 if (B_neigs > nmax) { 3559 if (pcbddc->dbg_flag) { 3560 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3561 } 3562 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3563 B_neigs = nmax; 3564 } 3565 3566 nmin_s = PetscMin(nmin,B_N); 3567 if (B_neigs < nmin_s) { 3568 PetscBLASInt B_neigs2 = 0; 3569 3570 if (pcbddc->use_deluxe_scaling) { 3571 if (scal) { 3572 B_IU = nmin_s; 3573 B_IL = B_neigs + 1; 3574 } else { 3575 B_IL = B_N - nmin_s + 1; 3576 B_IU = B_N - B_neigs; 3577 } 3578 } else { 3579 B_IL = B_neigs + 1; 3580 B_IU = nmin_s; 3581 } 3582 if (pcbddc->dbg_flag) { 3583 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3584 } 3585 if (sub_schurs->is_symmetric) { 3586 PetscInt j,k; 3587 for (j=0;j<subset_size;j++) { 3588 for (k=j;k<subset_size;k++) { 3589 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3590 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3591 } 3592 } 3593 } else { 3594 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3595 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3596 } 3597 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3598 #if defined(PETSC_USE_COMPLEX) 3599 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3600 #else 3601 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3602 #endif 3603 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3604 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3605 B_neigs += B_neigs2; 3606 } 3607 if (B_ierr) { 3608 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3609 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3610 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3611 } 3612 if (pcbddc->dbg_flag) { 3613 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3614 for (j=0;j<B_neigs;j++) { 3615 if (eigs[j] == 0.0) { 3616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3617 } else { 3618 if (pcbddc->use_deluxe_scaling) { 3619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3620 } else { 3621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3622 } 3623 } 3624 } 3625 } 3626 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3627 } 3628 /* change the basis back to the original one */ 3629 if (sub_schurs->change) { 3630 Mat change,phi,phit; 3631 3632 if (pcbddc->dbg_flag > 2) { 3633 PetscInt ii; 3634 for (ii=0;ii<B_neigs;ii++) { 3635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3636 for (j=0;j<B_N;j++) { 3637 #if defined(PETSC_USE_COMPLEX) 3638 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3639 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3641 #else 3642 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3643 #endif 3644 } 3645 } 3646 } 3647 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3648 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3649 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3650 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3651 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3652 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3653 } 3654 maxneigs = PetscMax(B_neigs,maxneigs); 3655 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3656 if (B_neigs) { 3657 ierr = PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);CHKERRQ(ierr); 3658 3659 if (pcbddc->dbg_flag > 1) { 3660 PetscInt ii; 3661 for (ii=0;ii<B_neigs;ii++) { 3662 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3663 for (j=0;j<B_N;j++) { 3664 #if defined(PETSC_USE_COMPLEX) 3665 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3666 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3667 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3668 #else 3669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3670 #endif 3671 } 3672 } 3673 } 3674 ierr = PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);CHKERRQ(ierr); 3675 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3676 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3677 cum++; 3678 } 3679 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3680 /* shift for next computation */ 3681 cumarray += subset_size*subset_size; 3682 } 3683 if (pcbddc->dbg_flag) { 3684 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3685 } 3686 3687 if (mss) { 3688 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3689 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3690 /* destroy matrices (junk) */ 3691 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3692 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3693 } 3694 if (allocated_S_St) { 3695 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3696 } 3697 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3698 #if defined(PETSC_USE_COMPLEX) 3699 ierr = PetscFree(rwork);CHKERRQ(ierr); 3700 #endif 3701 if (pcbddc->dbg_flag) { 3702 PetscInt maxneigs_r; 3703 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3704 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3705 } 3706 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3707 PetscFunctionReturn(0); 3708 } 3709 3710 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3711 { 3712 PetscScalar *coarse_submat_vals; 3713 PetscErrorCode ierr; 3714 3715 PetscFunctionBegin; 3716 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3717 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3718 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3719 3720 /* Setup local neumann solver ksp_R */ 3721 /* PCBDDCSetUpLocalScatters should be called first! */ 3722 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3723 3724 /* 3725 Setup local correction and local part of coarse basis. 3726 Gives back the dense local part of the coarse matrix in column major ordering 3727 */ 3728 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3729 3730 /* Compute total number of coarse nodes and setup coarse solver */ 3731 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3732 3733 /* free */ 3734 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3735 PetscFunctionReturn(0); 3736 } 3737 3738 PetscErrorCode PCBDDCResetCustomization(PC pc) 3739 { 3740 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3741 PetscErrorCode ierr; 3742 3743 PetscFunctionBegin; 3744 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3745 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3746 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3747 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3748 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3749 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3750 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3751 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3752 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3753 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3754 PetscFunctionReturn(0); 3755 } 3756 3757 PetscErrorCode PCBDDCResetTopography(PC pc) 3758 { 3759 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3760 PetscInt i; 3761 PetscErrorCode ierr; 3762 3763 PetscFunctionBegin; 3764 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3765 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3766 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3767 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3768 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3769 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3770 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3771 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3772 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3773 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3774 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3775 for (i=0;i<pcbddc->n_local_subs;i++) { 3776 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3777 } 3778 pcbddc->n_local_subs = 0; 3779 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3780 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3781 pcbddc->graphanalyzed = PETSC_FALSE; 3782 pcbddc->recompute_topography = PETSC_TRUE; 3783 pcbddc->corner_selected = PETSC_FALSE; 3784 PetscFunctionReturn(0); 3785 } 3786 3787 PetscErrorCode PCBDDCResetSolvers(PC pc) 3788 { 3789 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3790 PetscErrorCode ierr; 3791 3792 PetscFunctionBegin; 3793 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3794 if (pcbddc->coarse_phi_B) { 3795 PetscScalar *array; 3796 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3797 ierr = PetscFree(array);CHKERRQ(ierr); 3798 } 3799 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3800 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3801 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3802 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3803 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3804 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3805 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3806 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3807 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3808 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3809 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3810 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3811 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3812 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3813 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3814 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3815 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3816 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3817 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3818 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3819 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3820 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3821 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3822 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3823 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3824 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3825 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3826 if (pcbddc->benign_zerodiag_subs) { 3827 PetscInt i; 3828 for (i=0;i<pcbddc->benign_n;i++) { 3829 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3830 } 3831 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3832 } 3833 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3834 PetscFunctionReturn(0); 3835 } 3836 3837 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3838 { 3839 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3840 PC_IS *pcis = (PC_IS*)pc->data; 3841 VecType impVecType; 3842 PetscInt n_constraints,n_R,old_size; 3843 PetscErrorCode ierr; 3844 3845 PetscFunctionBegin; 3846 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3847 n_R = pcis->n - pcbddc->n_vertices; 3848 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3849 /* local work vectors (try to avoid unneeded work)*/ 3850 /* R nodes */ 3851 old_size = -1; 3852 if (pcbddc->vec1_R) { 3853 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3854 } 3855 if (n_R != old_size) { 3856 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3857 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3858 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3859 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3860 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3861 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3862 } 3863 /* local primal dofs */ 3864 old_size = -1; 3865 if (pcbddc->vec1_P) { 3866 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3867 } 3868 if (pcbddc->local_primal_size != old_size) { 3869 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3870 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3871 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3872 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3873 } 3874 /* local explicit constraints */ 3875 old_size = -1; 3876 if (pcbddc->vec1_C) { 3877 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3878 } 3879 if (n_constraints && n_constraints != old_size) { 3880 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3881 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3882 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3883 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3884 } 3885 PetscFunctionReturn(0); 3886 } 3887 3888 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3889 { 3890 PetscErrorCode ierr; 3891 /* pointers to pcis and pcbddc */ 3892 PC_IS* pcis = (PC_IS*)pc->data; 3893 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3894 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3895 /* submatrices of local problem */ 3896 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3897 /* submatrices of local coarse problem */ 3898 Mat S_VV,S_CV,S_VC,S_CC; 3899 /* working matrices */ 3900 Mat C_CR; 3901 /* additional working stuff */ 3902 PC pc_R; 3903 Mat F,Brhs = NULL; 3904 Vec dummy_vec; 3905 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3906 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3907 PetscScalar *work; 3908 PetscInt *idx_V_B; 3909 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3910 PetscInt i,n_R,n_D,n_B; 3911 PetscScalar one=1.0,m_one=-1.0; 3912 3913 PetscFunctionBegin; 3914 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3915 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3916 3917 /* Set Non-overlapping dimensions */ 3918 n_vertices = pcbddc->n_vertices; 3919 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3920 n_B = pcis->n_B; 3921 n_D = pcis->n - n_B; 3922 n_R = pcis->n - n_vertices; 3923 3924 /* vertices in boundary numbering */ 3925 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3926 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3927 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3928 3929 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3930 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3931 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3932 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3933 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3934 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3936 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3937 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3938 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3939 3940 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3941 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3942 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3943 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3944 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3945 lda_rhs = n_R; 3946 need_benign_correction = PETSC_FALSE; 3947 if (isLU || isCHOL) { 3948 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3949 } else if (sub_schurs && sub_schurs->reuse_solver) { 3950 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3951 MatFactorType type; 3952 3953 F = reuse_solver->F; 3954 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3955 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3956 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3957 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3958 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3959 } else F = NULL; 3960 3961 /* determine if we can use a sparse right-hand side */ 3962 sparserhs = PETSC_FALSE; 3963 if (F) { 3964 MatSolverType solver; 3965 3966 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3967 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3968 } 3969 3970 /* allocate workspace */ 3971 n = 0; 3972 if (n_constraints) { 3973 n += lda_rhs*n_constraints; 3974 } 3975 if (n_vertices) { 3976 n = PetscMax(2*lda_rhs*n_vertices,n); 3977 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3978 } 3979 if (!pcbddc->symmetric_primal) { 3980 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3981 } 3982 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3983 3984 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3985 dummy_vec = NULL; 3986 if (need_benign_correction && lda_rhs != n_R && F) { 3987 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3988 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 3989 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 3990 } 3991 3992 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3993 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3994 3995 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3996 if (n_constraints) { 3997 Mat M3,C_B; 3998 IS is_aux; 3999 PetscScalar *array,*array2; 4000 4001 /* Extract constraints on R nodes: C_{CR} */ 4002 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 4003 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 4004 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4005 4006 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4007 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4008 if (!sparserhs) { 4009 ierr = PetscArrayzero(work,lda_rhs*n_constraints);CHKERRQ(ierr); 4010 for (i=0;i<n_constraints;i++) { 4011 const PetscScalar *row_cmat_values; 4012 const PetscInt *row_cmat_indices; 4013 PetscInt size_of_constraint,j; 4014 4015 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4016 for (j=0;j<size_of_constraint;j++) { 4017 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4018 } 4019 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4020 } 4021 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 4022 } else { 4023 Mat tC_CR; 4024 4025 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4026 if (lda_rhs != n_R) { 4027 PetscScalar *aa; 4028 PetscInt r,*ii,*jj; 4029 PetscBool done; 4030 4031 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4032 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4033 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 4034 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 4035 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4036 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4037 } else { 4038 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 4039 tC_CR = C_CR; 4040 } 4041 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 4042 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 4043 } 4044 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 4045 if (F) { 4046 if (need_benign_correction) { 4047 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4048 4049 /* rhs is already zero on interior dofs, no need to change the rhs */ 4050 ierr = PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);CHKERRQ(ierr); 4051 } 4052 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 4053 if (need_benign_correction) { 4054 PetscScalar *marr; 4055 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4056 4057 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4058 if (lda_rhs != n_R) { 4059 for (i=0;i<n_constraints;i++) { 4060 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4061 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4062 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4063 } 4064 } else { 4065 for (i=0;i<n_constraints;i++) { 4066 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4067 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4068 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4069 } 4070 } 4071 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4072 } 4073 } else { 4074 PetscScalar *marr; 4075 4076 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4077 for (i=0;i<n_constraints;i++) { 4078 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4079 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 4080 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4081 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4082 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4083 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4084 } 4085 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4086 } 4087 if (sparserhs) { 4088 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4089 } 4090 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4091 if (!pcbddc->switch_static) { 4092 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4093 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4094 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4095 for (i=0;i<n_constraints;i++) { 4096 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4097 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4098 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4099 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4100 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4101 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4102 } 4103 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4104 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4105 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4106 } else { 4107 if (lda_rhs != n_R) { 4108 IS dummy; 4109 4110 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4111 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4112 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4113 } else { 4114 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4115 pcbddc->local_auxmat2 = local_auxmat2_R; 4116 } 4117 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4118 } 4119 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4120 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 4121 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4122 if (isCHOL) { 4123 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4124 } else { 4125 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4126 } 4127 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4128 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4129 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4130 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4131 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4132 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4133 } 4134 4135 /* Get submatrices from subdomain matrix */ 4136 if (n_vertices) { 4137 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4138 PetscBool oldpin; 4139 #endif 4140 PetscBool isaij; 4141 IS is_aux; 4142 4143 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4144 IS tis; 4145 4146 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4147 ierr = ISSort(tis);CHKERRQ(ierr); 4148 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4149 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4150 } else { 4151 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4152 } 4153 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4154 oldpin = pcbddc->local_mat->boundtocpu; 4155 #endif 4156 ierr = MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);CHKERRQ(ierr); 4157 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4158 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4159 ierr = PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);CHKERRQ(ierr); 4160 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4161 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4162 } 4163 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4164 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4165 ierr = MatBindToCPU(pcbddc->local_mat,oldpin);CHKERRQ(ierr); 4166 #endif 4167 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4168 } 4169 4170 /* Matrix of coarse basis functions (local) */ 4171 if (pcbddc->coarse_phi_B) { 4172 PetscInt on_B,on_primal,on_D=n_D; 4173 if (pcbddc->coarse_phi_D) { 4174 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4175 } 4176 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4177 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4178 PetscScalar *marray; 4179 4180 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4181 ierr = PetscFree(marray);CHKERRQ(ierr); 4182 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4183 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4184 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4185 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4186 } 4187 } 4188 4189 if (!pcbddc->coarse_phi_B) { 4190 PetscScalar *marr; 4191 4192 /* memory size */ 4193 n = n_B*pcbddc->local_primal_size; 4194 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4195 if (!pcbddc->symmetric_primal) n *= 2; 4196 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4197 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4198 marr += n_B*pcbddc->local_primal_size; 4199 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4200 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4201 marr += n_D*pcbddc->local_primal_size; 4202 } 4203 if (!pcbddc->symmetric_primal) { 4204 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4205 marr += n_B*pcbddc->local_primal_size; 4206 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4207 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4208 } 4209 } else { 4210 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4211 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4212 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4213 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4214 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4215 } 4216 } 4217 } 4218 4219 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4220 p0_lidx_I = NULL; 4221 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4222 const PetscInt *idxs; 4223 4224 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4225 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4226 for (i=0;i<pcbddc->benign_n;i++) { 4227 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4228 } 4229 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4230 } 4231 4232 /* vertices */ 4233 if (n_vertices) { 4234 PetscBool restoreavr = PETSC_FALSE; 4235 4236 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4237 4238 if (n_R) { 4239 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4240 PetscBLASInt B_N,B_one = 1; 4241 const PetscScalar *x; 4242 PetscScalar *y; 4243 4244 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4245 if (need_benign_correction) { 4246 ISLocalToGlobalMapping RtoN; 4247 IS is_p0; 4248 PetscInt *idxs_p0,n; 4249 4250 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4251 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4252 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4253 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4254 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4255 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4256 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4257 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4258 } 4259 4260 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4261 if (!sparserhs || need_benign_correction) { 4262 if (lda_rhs == n_R) { 4263 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4264 } else { 4265 PetscScalar *av,*array; 4266 const PetscInt *xadj,*adjncy; 4267 PetscInt n; 4268 PetscBool flg_row; 4269 4270 array = work+lda_rhs*n_vertices; 4271 ierr = PetscArrayzero(array,lda_rhs*n_vertices);CHKERRQ(ierr); 4272 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4273 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4274 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4275 for (i=0;i<n;i++) { 4276 PetscInt j; 4277 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4278 } 4279 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4280 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4281 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4282 } 4283 if (need_benign_correction) { 4284 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4285 PetscScalar *marr; 4286 4287 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4288 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4289 4290 | 0 0 0 | (V) 4291 L = | 0 0 -1 | (P-p0) 4292 | 0 0 -1 | (p0) 4293 4294 */ 4295 for (i=0;i<reuse_solver->benign_n;i++) { 4296 const PetscScalar *vals; 4297 const PetscInt *idxs,*idxs_zero; 4298 PetscInt n,j,nz; 4299 4300 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4301 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4302 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4303 for (j=0;j<n;j++) { 4304 PetscScalar val = vals[j]; 4305 PetscInt k,col = idxs[j]; 4306 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4307 } 4308 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4309 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4310 } 4311 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4312 } 4313 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4314 Brhs = A_RV; 4315 } else { 4316 Mat tA_RVT,A_RVT; 4317 4318 if (!pcbddc->symmetric_primal) { 4319 /* A_RV already scaled by -1 */ 4320 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4321 } else { 4322 restoreavr = PETSC_TRUE; 4323 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4324 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4325 A_RVT = A_VR; 4326 } 4327 if (lda_rhs != n_R) { 4328 PetscScalar *aa; 4329 PetscInt r,*ii,*jj; 4330 PetscBool done; 4331 4332 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4333 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4334 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4335 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4336 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4337 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4338 } else { 4339 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4340 tA_RVT = A_RVT; 4341 } 4342 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4343 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4344 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4345 } 4346 if (F) { 4347 /* need to correct the rhs */ 4348 if (need_benign_correction) { 4349 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4350 PetscScalar *marr; 4351 4352 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4353 if (lda_rhs != n_R) { 4354 for (i=0;i<n_vertices;i++) { 4355 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4356 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4357 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4358 } 4359 } else { 4360 for (i=0;i<n_vertices;i++) { 4361 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4362 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4363 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4364 } 4365 } 4366 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4367 } 4368 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4369 if (restoreavr) { 4370 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4371 } 4372 /* need to correct the solution */ 4373 if (need_benign_correction) { 4374 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4375 PetscScalar *marr; 4376 4377 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4378 if (lda_rhs != n_R) { 4379 for (i=0;i<n_vertices;i++) { 4380 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4381 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4382 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4383 } 4384 } else { 4385 for (i=0;i<n_vertices;i++) { 4386 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4387 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4388 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4389 } 4390 } 4391 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4392 } 4393 } else { 4394 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4395 for (i=0;i<n_vertices;i++) { 4396 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4397 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4398 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4399 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4400 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4401 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4402 } 4403 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4404 } 4405 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4406 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4407 /* S_VV and S_CV */ 4408 if (n_constraints) { 4409 Mat B; 4410 4411 ierr = PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);CHKERRQ(ierr); 4412 for (i=0;i<n_vertices;i++) { 4413 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4414 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4415 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4416 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4417 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4418 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4419 } 4420 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4421 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4422 ierr = MatDestroy(&B);CHKERRQ(ierr); 4423 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4424 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4425 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4426 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4427 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4428 ierr = MatDestroy(&B);CHKERRQ(ierr); 4429 } 4430 if (lda_rhs != n_R) { 4431 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4432 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4433 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4434 } 4435 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4436 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4437 if (need_benign_correction) { 4438 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4439 PetscScalar *marr,*sums; 4440 4441 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4442 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4443 for (i=0;i<reuse_solver->benign_n;i++) { 4444 const PetscScalar *vals; 4445 const PetscInt *idxs,*idxs_zero; 4446 PetscInt n,j,nz; 4447 4448 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4449 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4450 for (j=0;j<n_vertices;j++) { 4451 PetscInt k; 4452 sums[j] = 0.; 4453 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4454 } 4455 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4456 for (j=0;j<n;j++) { 4457 PetscScalar val = vals[j]; 4458 PetscInt k; 4459 for (k=0;k<n_vertices;k++) { 4460 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4461 } 4462 } 4463 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4464 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4465 } 4466 ierr = PetscFree(sums);CHKERRQ(ierr); 4467 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4468 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4469 } 4470 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4471 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4472 ierr = MatDenseGetArrayRead(A_VV,&x);CHKERRQ(ierr); 4473 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4474 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4475 ierr = MatDenseRestoreArrayRead(A_VV,&x);CHKERRQ(ierr); 4476 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4477 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4478 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4479 } else { 4480 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4481 } 4482 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4483 4484 /* coarse basis functions */ 4485 for (i=0;i<n_vertices;i++) { 4486 PetscScalar *y; 4487 4488 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4489 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4490 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4491 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4492 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4493 y[n_B*i+idx_V_B[i]] = 1.0; 4494 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4495 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4496 4497 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4498 PetscInt j; 4499 4500 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4501 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4502 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4503 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4504 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4505 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4506 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4507 } 4508 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4509 } 4510 /* if n_R == 0 the object is not destroyed */ 4511 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4512 } 4513 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4514 4515 if (n_constraints) { 4516 Mat B; 4517 4518 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4519 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4520 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4521 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4522 if (n_vertices) { 4523 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4524 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4525 } else { 4526 Mat S_VCt; 4527 4528 if (lda_rhs != n_R) { 4529 ierr = MatDestroy(&B);CHKERRQ(ierr); 4530 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4531 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4532 } 4533 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4534 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4535 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4536 } 4537 } 4538 ierr = MatDestroy(&B);CHKERRQ(ierr); 4539 /* coarse basis functions */ 4540 for (i=0;i<n_constraints;i++) { 4541 PetscScalar *y; 4542 4543 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4544 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4545 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4546 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4547 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4548 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4549 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4550 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4551 PetscInt j; 4552 4553 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4554 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4555 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4556 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4557 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4558 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4559 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4560 } 4561 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4562 } 4563 } 4564 if (n_constraints) { 4565 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4566 } 4567 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4568 4569 /* coarse matrix entries relative to B_0 */ 4570 if (pcbddc->benign_n) { 4571 Mat B0_B,B0_BPHI; 4572 IS is_dummy; 4573 const PetscScalar *data; 4574 PetscInt j; 4575 4576 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4577 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4578 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4579 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4580 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4581 ierr = MatDenseGetArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4582 for (j=0;j<pcbddc->benign_n;j++) { 4583 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4584 for (i=0;i<pcbddc->local_primal_size;i++) { 4585 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4586 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4587 } 4588 } 4589 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4590 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4591 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4592 } 4593 4594 /* compute other basis functions for non-symmetric problems */ 4595 if (!pcbddc->symmetric_primal) { 4596 Mat B_V=NULL,B_C=NULL; 4597 PetscScalar *marray; 4598 4599 if (n_constraints) { 4600 Mat S_CCT,C_CRT; 4601 4602 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4603 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4604 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4605 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4606 if (n_vertices) { 4607 Mat S_VCT; 4608 4609 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4610 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4611 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4612 } 4613 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4614 } else { 4615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4616 } 4617 if (n_vertices && n_R) { 4618 PetscScalar *av,*marray; 4619 const PetscInt *xadj,*adjncy; 4620 PetscInt n; 4621 PetscBool flg_row; 4622 4623 /* B_V = B_V - A_VR^T */ 4624 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4625 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4626 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4627 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4628 for (i=0;i<n;i++) { 4629 PetscInt j; 4630 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4631 } 4632 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4633 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4634 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4635 } 4636 4637 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4638 if (n_vertices) { 4639 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4640 for (i=0;i<n_vertices;i++) { 4641 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4642 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4643 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4644 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4645 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4646 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4647 } 4648 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4649 } 4650 if (B_C) { 4651 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4652 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4653 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4654 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4655 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4656 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4657 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4658 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4659 } 4660 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4661 } 4662 /* coarse basis functions */ 4663 for (i=0;i<pcbddc->local_primal_size;i++) { 4664 PetscScalar *y; 4665 4666 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4667 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4668 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4669 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4670 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4671 if (i<n_vertices) { 4672 y[n_B*i+idx_V_B[i]] = 1.0; 4673 } 4674 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4675 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4676 4677 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4678 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4679 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4680 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4681 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4682 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4683 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4684 } 4685 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4686 } 4687 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4688 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4689 } 4690 4691 /* free memory */ 4692 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4693 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4694 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4695 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4696 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4697 ierr = PetscFree(work);CHKERRQ(ierr); 4698 if (n_vertices) { 4699 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4700 } 4701 if (n_constraints) { 4702 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4703 } 4704 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4705 4706 /* Checking coarse_sub_mat and coarse basis functios */ 4707 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4708 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4709 if (pcbddc->dbg_flag) { 4710 Mat coarse_sub_mat; 4711 Mat AUXMAT,TM1,TM2,TM3,TM4; 4712 Mat coarse_phi_D,coarse_phi_B; 4713 Mat coarse_psi_D,coarse_psi_B; 4714 Mat A_II,A_BB,A_IB,A_BI; 4715 Mat C_B,CPHI; 4716 IS is_dummy; 4717 Vec mones; 4718 MatType checkmattype=MATSEQAIJ; 4719 PetscReal real_value; 4720 4721 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4722 Mat A; 4723 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4724 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4725 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4726 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4727 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4728 ierr = MatDestroy(&A);CHKERRQ(ierr); 4729 } else { 4730 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4731 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4732 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4733 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4734 } 4735 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4736 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4737 if (!pcbddc->symmetric_primal) { 4738 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4739 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4740 } 4741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4742 4743 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4744 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4745 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4746 if (!pcbddc->symmetric_primal) { 4747 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4748 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4749 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4750 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4751 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4752 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4753 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4754 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4755 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4756 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4757 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4758 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4759 } else { 4760 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4761 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4762 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4763 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4764 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4765 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4766 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4767 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4768 } 4769 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4770 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4771 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4772 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4773 if (pcbddc->benign_n) { 4774 Mat B0_B,B0_BPHI; 4775 const PetscScalar *data2; 4776 PetscScalar *data; 4777 PetscInt j; 4778 4779 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4780 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4781 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4782 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4783 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4784 ierr = MatDenseGetArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4785 for (j=0;j<pcbddc->benign_n;j++) { 4786 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4787 for (i=0;i<pcbddc->local_primal_size;i++) { 4788 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4789 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4790 } 4791 } 4792 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4793 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4794 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4795 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4796 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4797 } 4798 #if 0 4799 { 4800 PetscViewer viewer; 4801 char filename[256]; 4802 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4803 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4804 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4805 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4806 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4807 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4808 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4809 if (pcbddc->coarse_phi_B) { 4810 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4811 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4812 } 4813 if (pcbddc->coarse_phi_D) { 4814 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4815 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4816 } 4817 if (pcbddc->coarse_psi_B) { 4818 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4819 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4820 } 4821 if (pcbddc->coarse_psi_D) { 4822 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4823 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4824 } 4825 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4826 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4827 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4828 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4829 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4830 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4831 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4832 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4833 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4834 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4835 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4836 } 4837 #endif 4838 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4839 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4840 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4841 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4842 4843 /* check constraints */ 4844 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4845 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4846 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4847 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4848 } else { 4849 PetscScalar *data; 4850 Mat tmat; 4851 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4853 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4854 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4855 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4856 } 4857 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4858 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4859 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4860 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4861 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4862 if (!pcbddc->symmetric_primal) { 4863 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4864 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4865 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4866 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4867 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4868 } 4869 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4870 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4871 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4872 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4873 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4874 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4875 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4876 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4877 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4878 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4879 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4880 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4881 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4882 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4883 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4884 if (!pcbddc->symmetric_primal) { 4885 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4886 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4887 } 4888 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4889 } 4890 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4891 { 4892 PetscBool gpu; 4893 4894 ierr = PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);CHKERRQ(ierr); 4895 if (gpu) { 4896 if (pcbddc->local_auxmat1) { 4897 ierr = MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4898 } 4899 if (pcbddc->local_auxmat2) { 4900 ierr = MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4901 } 4902 if (pcbddc->coarse_phi_B) { 4903 ierr = MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4904 } 4905 if (pcbddc->coarse_phi_D) { 4906 ierr = MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4907 } 4908 if (pcbddc->coarse_psi_B) { 4909 ierr = MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4910 } 4911 if (pcbddc->coarse_psi_D) { 4912 ierr = MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4913 } 4914 } 4915 } 4916 /* get back data */ 4917 *coarse_submat_vals_n = coarse_submat_vals; 4918 PetscFunctionReturn(0); 4919 } 4920 4921 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4922 { 4923 Mat *work_mat; 4924 IS isrow_s,iscol_s; 4925 PetscBool rsorted,csorted; 4926 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4927 PetscErrorCode ierr; 4928 4929 PetscFunctionBegin; 4930 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4931 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4932 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4933 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4934 4935 if (!rsorted) { 4936 const PetscInt *idxs; 4937 PetscInt *idxs_sorted,i; 4938 4939 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4940 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4941 for (i=0;i<rsize;i++) { 4942 idxs_perm_r[i] = i; 4943 } 4944 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4945 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4946 for (i=0;i<rsize;i++) { 4947 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4948 } 4949 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4950 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4951 } else { 4952 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4953 isrow_s = isrow; 4954 } 4955 4956 if (!csorted) { 4957 if (isrow == iscol) { 4958 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4959 iscol_s = isrow_s; 4960 } else { 4961 const PetscInt *idxs; 4962 PetscInt *idxs_sorted,i; 4963 4964 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4965 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4966 for (i=0;i<csize;i++) { 4967 idxs_perm_c[i] = i; 4968 } 4969 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4970 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4971 for (i=0;i<csize;i++) { 4972 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4973 } 4974 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4975 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4976 } 4977 } else { 4978 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4979 iscol_s = iscol; 4980 } 4981 4982 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4983 4984 if (!rsorted || !csorted) { 4985 Mat new_mat; 4986 IS is_perm_r,is_perm_c; 4987 4988 if (!rsorted) { 4989 PetscInt *idxs_r,i; 4990 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4991 for (i=0;i<rsize;i++) { 4992 idxs_r[idxs_perm_r[i]] = i; 4993 } 4994 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4995 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4996 } else { 4997 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4998 } 4999 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 5000 5001 if (!csorted) { 5002 if (isrow_s == iscol_s) { 5003 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 5004 is_perm_c = is_perm_r; 5005 } else { 5006 PetscInt *idxs_c,i; 5007 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5008 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 5009 for (i=0;i<csize;i++) { 5010 idxs_c[idxs_perm_c[i]] = i; 5011 } 5012 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 5013 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 5014 } 5015 } else { 5016 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 5017 } 5018 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 5019 5020 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 5021 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 5022 work_mat[0] = new_mat; 5023 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 5024 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 5025 } 5026 5027 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 5028 *B = work_mat[0]; 5029 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 5030 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 5031 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 5032 PetscFunctionReturn(0); 5033 } 5034 5035 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5036 { 5037 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5038 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5039 Mat new_mat,lA; 5040 IS is_local,is_global; 5041 PetscInt local_size; 5042 PetscBool isseqaij; 5043 PetscErrorCode ierr; 5044 5045 PetscFunctionBegin; 5046 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5047 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 5048 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 5049 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 5050 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 5051 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 5052 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5053 5054 if (pcbddc->dbg_flag) { 5055 Vec x,x_change; 5056 PetscReal error; 5057 5058 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 5059 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5060 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 5061 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5062 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5063 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 5064 if (!pcbddc->change_interior) { 5065 const PetscScalar *x,*y,*v; 5066 PetscReal lerror = 0.; 5067 PetscInt i; 5068 5069 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 5070 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 5071 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 5072 for (i=0;i<local_size;i++) 5073 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5074 lerror = PetscAbsScalar(x[i]-y[i]); 5075 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 5076 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 5077 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 5078 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5079 if (error > PETSC_SMALL) { 5080 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5081 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5082 } else { 5083 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5084 } 5085 } 5086 } 5087 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5088 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5089 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5090 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5091 if (error > PETSC_SMALL) { 5092 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5093 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5094 } else { 5095 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5096 } 5097 } 5098 ierr = VecDestroy(&x);CHKERRQ(ierr); 5099 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5100 } 5101 5102 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5103 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 5104 5105 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5106 ierr = PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5107 if (isseqaij) { 5108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5109 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5110 if (lA) { 5111 Mat work; 5112 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5113 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5114 ierr = MatDestroy(&work);CHKERRQ(ierr); 5115 } 5116 } else { 5117 Mat work_mat; 5118 5119 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5120 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5121 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5122 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 5123 if (lA) { 5124 Mat work; 5125 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5126 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5127 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5128 ierr = MatDestroy(&work);CHKERRQ(ierr); 5129 } 5130 } 5131 if (matis->A->symmetric_set) { 5132 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 5133 #if !defined(PETSC_USE_COMPLEX) 5134 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 5135 #endif 5136 } 5137 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5138 PetscFunctionReturn(0); 5139 } 5140 5141 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5142 { 5143 PC_IS* pcis = (PC_IS*)(pc->data); 5144 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5145 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5146 PetscInt *idx_R_local=NULL; 5147 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5148 PetscInt vbs,bs; 5149 PetscBT bitmask=NULL; 5150 PetscErrorCode ierr; 5151 5152 PetscFunctionBegin; 5153 /* 5154 No need to setup local scatters if 5155 - primal space is unchanged 5156 AND 5157 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5158 AND 5159 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5160 */ 5161 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5162 PetscFunctionReturn(0); 5163 } 5164 /* destroy old objects */ 5165 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5166 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5167 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5168 /* Set Non-overlapping dimensions */ 5169 n_B = pcis->n_B; 5170 n_D = pcis->n - n_B; 5171 n_vertices = pcbddc->n_vertices; 5172 5173 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5174 5175 /* create auxiliary bitmask and allocate workspace */ 5176 if (!sub_schurs || !sub_schurs->reuse_solver) { 5177 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5178 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5179 for (i=0;i<n_vertices;i++) { 5180 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5181 } 5182 5183 for (i=0, n_R=0; i<pcis->n; i++) { 5184 if (!PetscBTLookup(bitmask,i)) { 5185 idx_R_local[n_R++] = i; 5186 } 5187 } 5188 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5189 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5190 5191 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5192 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5193 } 5194 5195 /* Block code */ 5196 vbs = 1; 5197 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5198 if (bs>1 && !(n_vertices%bs)) { 5199 PetscBool is_blocked = PETSC_TRUE; 5200 PetscInt *vary; 5201 if (!sub_schurs || !sub_schurs->reuse_solver) { 5202 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5203 ierr = PetscArrayzero(vary,pcis->n/bs);CHKERRQ(ierr); 5204 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5205 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5206 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5207 for (i=0; i<pcis->n/bs; i++) { 5208 if (vary[i]!=0 && vary[i]!=bs) { 5209 is_blocked = PETSC_FALSE; 5210 break; 5211 } 5212 } 5213 ierr = PetscFree(vary);CHKERRQ(ierr); 5214 } else { 5215 /* Verify directly the R set */ 5216 for (i=0; i<n_R/bs; i++) { 5217 PetscInt j,node=idx_R_local[bs*i]; 5218 for (j=1; j<bs; j++) { 5219 if (node != idx_R_local[bs*i+j]-j) { 5220 is_blocked = PETSC_FALSE; 5221 break; 5222 } 5223 } 5224 } 5225 } 5226 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5227 vbs = bs; 5228 for (i=0;i<n_R/vbs;i++) { 5229 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5230 } 5231 } 5232 } 5233 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5234 if (sub_schurs && sub_schurs->reuse_solver) { 5235 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5236 5237 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5238 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5239 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5240 reuse_solver->is_R = pcbddc->is_R_local; 5241 } else { 5242 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5243 } 5244 5245 /* print some info if requested */ 5246 if (pcbddc->dbg_flag) { 5247 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5248 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5249 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5250 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5251 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5252 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5253 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5254 } 5255 5256 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5257 if (!sub_schurs || !sub_schurs->reuse_solver) { 5258 IS is_aux1,is_aux2; 5259 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5260 5261 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5262 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5263 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5264 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5265 for (i=0; i<n_D; i++) { 5266 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5267 } 5268 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5269 for (i=0, j=0; i<n_R; i++) { 5270 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5271 aux_array1[j++] = i; 5272 } 5273 } 5274 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5275 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5276 for (i=0, j=0; i<n_B; i++) { 5277 if (!PetscBTLookup(bitmask,is_indices[i])) { 5278 aux_array2[j++] = i; 5279 } 5280 } 5281 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5282 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5283 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5284 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5285 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5286 5287 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5288 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5289 for (i=0, j=0; i<n_R; i++) { 5290 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5291 aux_array1[j++] = i; 5292 } 5293 } 5294 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5295 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5296 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5297 } 5298 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5299 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5300 } else { 5301 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5302 IS tis; 5303 PetscInt schur_size; 5304 5305 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5306 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5307 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5308 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5309 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5310 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5311 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5312 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5313 } 5314 } 5315 PetscFunctionReturn(0); 5316 } 5317 5318 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5319 { 5320 MatNullSpace NullSpace; 5321 Mat dmat; 5322 const Vec *nullvecs; 5323 Vec v,v2,*nullvecs2; 5324 VecScatter sct = NULL; 5325 PetscContainer c; 5326 PetscScalar *ddata; 5327 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5328 PetscBool nnsp_has_cnst; 5329 PetscErrorCode ierr; 5330 5331 PetscFunctionBegin; 5332 if (!is && !B) { /* MATIS */ 5333 Mat_IS* matis = (Mat_IS*)A->data; 5334 5335 if (!B) { 5336 ierr = MatISGetLocalMat(A,&B);CHKERRQ(ierr); 5337 } 5338 sct = matis->cctx; 5339 ierr = PetscObjectReference((PetscObject)sct);CHKERRQ(ierr); 5340 } else { 5341 ierr = MatGetNullSpace(B,&NullSpace);CHKERRQ(ierr); 5342 if (!NullSpace) { 5343 ierr = MatGetNearNullSpace(B,&NullSpace);CHKERRQ(ierr); 5344 } 5345 if (NullSpace) PetscFunctionReturn(0); 5346 } 5347 ierr = MatGetNullSpace(A,&NullSpace);CHKERRQ(ierr); 5348 if (!NullSpace) { 5349 ierr = MatGetNearNullSpace(A,&NullSpace);CHKERRQ(ierr); 5350 } 5351 if (!NullSpace) PetscFunctionReturn(0); 5352 5353 ierr = MatCreateVecs(A,&v,NULL);CHKERRQ(ierr); 5354 ierr = MatCreateVecs(B,&v2,NULL);CHKERRQ(ierr); 5355 if (!sct) { 5356 ierr = VecScatterCreate(v,is,v2,NULL,&sct);CHKERRQ(ierr); 5357 } 5358 ierr = MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);CHKERRQ(ierr); 5359 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5360 ierr = PetscMalloc1(bsiz,&nullvecs2);CHKERRQ(ierr); 5361 ierr = VecGetBlockSize(v2,&bs);CHKERRQ(ierr); 5362 ierr = VecGetSize(v2,&N);CHKERRQ(ierr); 5363 ierr = VecGetLocalSize(v2,&n);CHKERRQ(ierr); 5364 ierr = PetscMalloc1(n*bsiz,&ddata);CHKERRQ(ierr); 5365 for (k=0;k<nnsp_size;k++) { 5366 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);CHKERRQ(ierr); 5367 ierr = VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5368 ierr = VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5369 } 5370 if (nnsp_has_cnst) { 5371 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);CHKERRQ(ierr); 5372 ierr = VecSet(nullvecs2[nnsp_size],1.0);CHKERRQ(ierr); 5373 } 5374 ierr = PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);CHKERRQ(ierr); 5375 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);CHKERRQ(ierr); 5376 5377 ierr = MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);CHKERRQ(ierr); 5378 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);CHKERRQ(ierr); 5379 ierr = PetscContainerSetPointer(c,ddata);CHKERRQ(ierr); 5380 ierr = PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 5381 ierr = PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);CHKERRQ(ierr); 5382 ierr = PetscContainerDestroy(&c);CHKERRQ(ierr); 5383 ierr = PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);CHKERRQ(ierr); 5384 ierr = MatDestroy(&dmat);CHKERRQ(ierr); 5385 5386 for (k=0;k<bsiz;k++) { 5387 ierr = VecDestroy(&nullvecs2[k]);CHKERRQ(ierr); 5388 } 5389 ierr = PetscFree(nullvecs2);CHKERRQ(ierr); 5390 ierr = MatSetNearNullSpace(B,NullSpace);CHKERRQ(ierr); 5391 ierr = MatNullSpaceDestroy(&NullSpace);CHKERRQ(ierr); 5392 ierr = VecDestroy(&v);CHKERRQ(ierr); 5393 ierr = VecDestroy(&v2);CHKERRQ(ierr); 5394 ierr = VecScatterDestroy(&sct);CHKERRQ(ierr); 5395 PetscFunctionReturn(0); 5396 } 5397 5398 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5399 { 5400 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5401 PC_IS *pcis = (PC_IS*)pc->data; 5402 PC pc_temp; 5403 Mat A_RR; 5404 MatNullSpace nnsp; 5405 MatReuse reuse; 5406 PetscScalar m_one = -1.0; 5407 PetscReal value; 5408 PetscInt n_D,n_R; 5409 PetscBool issbaij,opts; 5410 PetscErrorCode ierr; 5411 void (*f)(void) = 0; 5412 char dir_prefix[256],neu_prefix[256],str_level[16]; 5413 size_t len; 5414 5415 PetscFunctionBegin; 5416 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5417 /* approximate solver, propagate NearNullSpace if needed */ 5418 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5419 MatNullSpace gnnsp1,gnnsp2; 5420 PetscBool lhas,ghas; 5421 5422 ierr = MatGetNearNullSpace(pcbddc->local_mat,&nnsp);CHKERRQ(ierr); 5423 ierr = MatGetNearNullSpace(pc->pmat,&gnnsp1);CHKERRQ(ierr); 5424 ierr = MatGetNullSpace(pc->pmat,&gnnsp2);CHKERRQ(ierr); 5425 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5426 ierr = MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5427 if (!ghas && (gnnsp1 || gnnsp2)) { 5428 ierr = MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);CHKERRQ(ierr); 5429 } 5430 } 5431 5432 /* compute prefixes */ 5433 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5434 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5435 if (!pcbddc->current_level) { 5436 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5437 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5438 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5439 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5440 } else { 5441 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5442 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5443 len -= 15; /* remove "pc_bddc_coarse_" */ 5444 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5445 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5446 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5447 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5448 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5449 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5450 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5451 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5452 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5453 } 5454 5455 /* DIRICHLET PROBLEM */ 5456 if (dirichlet) { 5457 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5458 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5459 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5460 if (pcbddc->dbg_flag) { 5461 Mat A_IIn; 5462 5463 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5464 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5465 pcis->A_II = A_IIn; 5466 } 5467 } 5468 if (pcbddc->local_mat->symmetric_set) { 5469 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5470 } 5471 /* Matrix for Dirichlet problem is pcis->A_II */ 5472 n_D = pcis->n - pcis->n_B; 5473 opts = PETSC_FALSE; 5474 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5475 opts = PETSC_TRUE; 5476 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5477 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5478 /* default */ 5479 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5480 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5481 ierr = PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5482 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5483 if (issbaij) { 5484 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5485 } else { 5486 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5487 } 5488 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5489 } 5490 ierr = MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5491 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);CHKERRQ(ierr); 5492 /* Allow user's customization */ 5493 if (opts) { 5494 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5495 } 5496 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5497 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5498 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);CHKERRQ(ierr); 5499 } 5500 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5501 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5502 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5503 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5504 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5505 const PetscInt *idxs; 5506 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5507 5508 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5509 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5510 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5511 for (i=0;i<nl;i++) { 5512 for (d=0;d<cdim;d++) { 5513 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5514 } 5515 } 5516 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5517 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5518 ierr = PetscFree(scoords);CHKERRQ(ierr); 5519 } 5520 if (sub_schurs && sub_schurs->reuse_solver) { 5521 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5522 5523 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5524 } 5525 5526 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5527 if (!n_D) { 5528 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5529 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5530 } 5531 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5532 /* set ksp_D into pcis data */ 5533 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5534 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5535 pcis->ksp_D = pcbddc->ksp_D; 5536 } 5537 5538 /* NEUMANN PROBLEM */ 5539 A_RR = 0; 5540 if (neumann) { 5541 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5542 PetscInt ibs,mbs; 5543 PetscBool issbaij, reuse_neumann_solver; 5544 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5545 5546 reuse_neumann_solver = PETSC_FALSE; 5547 if (sub_schurs && sub_schurs->reuse_solver) { 5548 IS iP; 5549 5550 reuse_neumann_solver = PETSC_TRUE; 5551 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5552 if (iP) reuse_neumann_solver = PETSC_FALSE; 5553 } 5554 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5555 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5556 if (pcbddc->ksp_R) { /* already created ksp */ 5557 PetscInt nn_R; 5558 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5559 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5560 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5561 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5562 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5563 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5564 reuse = MAT_INITIAL_MATRIX; 5565 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5566 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5567 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5568 reuse = MAT_INITIAL_MATRIX; 5569 } else { /* safe to reuse the matrix */ 5570 reuse = MAT_REUSE_MATRIX; 5571 } 5572 } 5573 /* last check */ 5574 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5575 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5576 reuse = MAT_INITIAL_MATRIX; 5577 } 5578 } else { /* first time, so we need to create the matrix */ 5579 reuse = MAT_INITIAL_MATRIX; 5580 } 5581 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5582 TODO: Get Rid of these conversions */ 5583 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5584 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5585 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5586 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5587 if (matis->A == pcbddc->local_mat) { 5588 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5589 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5590 } else { 5591 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5592 } 5593 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5594 if (matis->A == pcbddc->local_mat) { 5595 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5596 ierr = MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5597 } else { 5598 ierr = MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5599 } 5600 } 5601 /* extract A_RR */ 5602 if (reuse_neumann_solver) { 5603 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5604 5605 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5606 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5607 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5608 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5609 } else { 5610 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5611 } 5612 } else { 5613 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5614 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5615 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5616 } 5617 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5618 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5619 } 5620 if (pcbddc->local_mat->symmetric_set) { 5621 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5622 } 5623 opts = PETSC_FALSE; 5624 if (!pcbddc->ksp_R) { /* create object if not present */ 5625 opts = PETSC_TRUE; 5626 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5627 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5628 /* default */ 5629 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5630 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5631 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5632 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5633 if (issbaij) { 5634 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5635 } else { 5636 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5637 } 5638 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5639 } 5640 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5641 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5642 if (opts) { /* Allow user's customization once */ 5643 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5644 } 5645 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5646 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5647 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);CHKERRQ(ierr); 5648 } 5649 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5650 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5651 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5652 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5653 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5654 const PetscInt *idxs; 5655 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5656 5657 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5658 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5659 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5660 for (i=0;i<nl;i++) { 5661 for (d=0;d<cdim;d++) { 5662 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5663 } 5664 } 5665 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5666 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5667 ierr = PetscFree(scoords);CHKERRQ(ierr); 5668 } 5669 5670 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5671 if (!n_R) { 5672 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5673 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5674 } 5675 /* Reuse solver if it is present */ 5676 if (reuse_neumann_solver) { 5677 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5678 5679 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5680 } 5681 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5682 } 5683 5684 if (pcbddc->dbg_flag) { 5685 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5686 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5687 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5688 } 5689 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5690 5691 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5692 if (pcbddc->NullSpace_corr[0]) { 5693 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5694 } 5695 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5696 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5697 } 5698 if (neumann && pcbddc->NullSpace_corr[2]) { 5699 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5700 } 5701 /* check Dirichlet and Neumann solvers */ 5702 if (pcbddc->dbg_flag) { 5703 if (dirichlet) { /* Dirichlet */ 5704 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5705 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5706 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5707 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 5708 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5709 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5712 } 5713 if (neumann) { /* Neumann */ 5714 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5715 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5716 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5717 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 5718 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5719 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5721 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5722 } 5723 } 5724 /* free Neumann problem's matrix */ 5725 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5726 PetscFunctionReturn(0); 5727 } 5728 5729 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5730 { 5731 PetscErrorCode ierr; 5732 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5733 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5734 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5735 5736 PetscFunctionBegin; 5737 if (!reuse_solver) { 5738 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5739 } 5740 if (!pcbddc->switch_static) { 5741 if (applytranspose && pcbddc->local_auxmat1) { 5742 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5743 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5744 } 5745 if (!reuse_solver) { 5746 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5747 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5748 } else { 5749 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5750 5751 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5752 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5753 } 5754 } else { 5755 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5756 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5757 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5758 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5759 if (applytranspose && pcbddc->local_auxmat1) { 5760 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5761 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5762 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5763 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5764 } 5765 } 5766 if (!reuse_solver || pcbddc->switch_static) { 5767 if (applytranspose) { 5768 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5769 } else { 5770 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5771 } 5772 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);CHKERRQ(ierr); 5773 } else { 5774 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5775 5776 if (applytranspose) { 5777 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5778 } else { 5779 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5780 } 5781 } 5782 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5783 if (!pcbddc->switch_static) { 5784 if (!reuse_solver) { 5785 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5786 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5787 } else { 5788 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5789 5790 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5791 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5792 } 5793 if (!applytranspose && pcbddc->local_auxmat1) { 5794 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5795 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5796 } 5797 } else { 5798 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5799 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5800 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5801 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5802 if (!applytranspose && pcbddc->local_auxmat1) { 5803 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5804 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5805 } 5806 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5807 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5808 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5809 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5810 } 5811 PetscFunctionReturn(0); 5812 } 5813 5814 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5815 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5816 { 5817 PetscErrorCode ierr; 5818 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5819 PC_IS* pcis = (PC_IS*) (pc->data); 5820 const PetscScalar zero = 0.0; 5821 5822 PetscFunctionBegin; 5823 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5824 if (!pcbddc->benign_apply_coarse_only) { 5825 if (applytranspose) { 5826 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5827 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5828 } else { 5829 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5830 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5831 } 5832 } else { 5833 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5834 } 5835 5836 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5837 if (pcbddc->benign_n) { 5838 PetscScalar *array; 5839 PetscInt j; 5840 5841 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5842 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5843 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5844 } 5845 5846 /* start communications from local primal nodes to rhs of coarse solver */ 5847 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5848 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5849 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5850 5851 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5852 if (pcbddc->coarse_ksp) { 5853 Mat coarse_mat; 5854 Vec rhs,sol; 5855 MatNullSpace nullsp; 5856 PetscBool isbddc = PETSC_FALSE; 5857 5858 if (pcbddc->benign_have_null) { 5859 PC coarse_pc; 5860 5861 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5862 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5863 /* we need to propagate to coarser levels the need for a possible benign correction */ 5864 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5865 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5866 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5867 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5868 } 5869 } 5870 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5871 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5872 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5873 if (applytranspose) { 5874 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5875 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5876 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5877 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5878 if (nullsp) { 5879 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5880 } 5881 } else { 5882 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5883 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5884 PC coarse_pc; 5885 5886 if (nullsp) { 5887 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5888 } 5889 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5890 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5891 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5892 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5893 } else { 5894 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5895 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5896 if (nullsp) { 5897 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5898 } 5899 } 5900 } 5901 /* we don't need the benign correction at coarser levels anymore */ 5902 if (pcbddc->benign_have_null && isbddc) { 5903 PC coarse_pc; 5904 PC_BDDC* coarsepcbddc; 5905 5906 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5907 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5908 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5909 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5910 } 5911 } 5912 5913 /* Local solution on R nodes */ 5914 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5915 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5916 } 5917 /* communications from coarse sol to local primal nodes */ 5918 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5919 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5920 5921 /* Sum contributions from the two levels */ 5922 if (!pcbddc->benign_apply_coarse_only) { 5923 if (applytranspose) { 5924 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5925 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5926 } else { 5927 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5928 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5929 } 5930 /* store p0 */ 5931 if (pcbddc->benign_n) { 5932 PetscScalar *array; 5933 PetscInt j; 5934 5935 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5936 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5937 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5938 } 5939 } else { /* expand the coarse solution */ 5940 if (applytranspose) { 5941 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5942 } else { 5943 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5944 } 5945 } 5946 PetscFunctionReturn(0); 5947 } 5948 5949 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5950 { 5951 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5952 Vec from,to; 5953 const PetscScalar *array; 5954 PetscErrorCode ierr; 5955 5956 PetscFunctionBegin; 5957 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5958 from = pcbddc->coarse_vec; 5959 to = pcbddc->vec1_P; 5960 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5961 Vec tvec; 5962 5963 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5964 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5965 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5966 ierr = VecGetArrayRead(tvec,&array);CHKERRQ(ierr); 5967 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5968 ierr = VecRestoreArrayRead(tvec,&array);CHKERRQ(ierr); 5969 } 5970 } else { /* from local to global -> put data in coarse right hand side */ 5971 from = pcbddc->vec1_P; 5972 to = pcbddc->coarse_vec; 5973 } 5974 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5975 PetscFunctionReturn(0); 5976 } 5977 5978 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5979 { 5980 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5981 Vec from,to; 5982 const PetscScalar *array; 5983 PetscErrorCode ierr; 5984 5985 PetscFunctionBegin; 5986 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5987 from = pcbddc->coarse_vec; 5988 to = pcbddc->vec1_P; 5989 } else { /* from local to global -> put data in coarse right hand side */ 5990 from = pcbddc->vec1_P; 5991 to = pcbddc->coarse_vec; 5992 } 5993 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5994 if (smode == SCATTER_FORWARD) { 5995 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5996 Vec tvec; 5997 5998 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5999 ierr = VecGetArrayRead(to,&array);CHKERRQ(ierr); 6000 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 6001 ierr = VecRestoreArrayRead(to,&array);CHKERRQ(ierr); 6002 } 6003 } else { 6004 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6005 ierr = VecResetArray(from);CHKERRQ(ierr); 6006 } 6007 } 6008 PetscFunctionReturn(0); 6009 } 6010 6011 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6012 { 6013 PetscErrorCode ierr; 6014 PC_IS* pcis = (PC_IS*)(pc->data); 6015 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6016 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6017 /* one and zero */ 6018 PetscScalar one=1.0,zero=0.0; 6019 /* space to store constraints and their local indices */ 6020 PetscScalar *constraints_data; 6021 PetscInt *constraints_idxs,*constraints_idxs_B; 6022 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6023 PetscInt *constraints_n; 6024 /* iterators */ 6025 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6026 /* BLAS integers */ 6027 PetscBLASInt lwork,lierr; 6028 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6029 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6030 /* reuse */ 6031 PetscInt olocal_primal_size,olocal_primal_size_cc; 6032 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6033 /* change of basis */ 6034 PetscBool qr_needed; 6035 PetscBT change_basis,qr_needed_idx; 6036 /* auxiliary stuff */ 6037 PetscInt *nnz,*is_indices; 6038 PetscInt ncc; 6039 /* some quantities */ 6040 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6041 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6042 PetscReal tol; /* tolerance for retaining eigenmodes */ 6043 6044 PetscFunctionBegin; 6045 tol = PetscSqrtReal(PETSC_SMALL); 6046 /* Destroy Mat objects computed previously */ 6047 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6048 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6049 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 6050 /* save info on constraints from previous setup (if any) */ 6051 olocal_primal_size = pcbddc->local_primal_size; 6052 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6053 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 6054 ierr = PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);CHKERRQ(ierr); 6055 ierr = PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);CHKERRQ(ierr); 6056 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6057 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6058 6059 if (!pcbddc->adaptive_selection) { 6060 IS ISForVertices,*ISForFaces,*ISForEdges; 6061 MatNullSpace nearnullsp; 6062 const Vec *nearnullvecs; 6063 Vec *localnearnullsp; 6064 PetscScalar *array; 6065 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6066 PetscBool nnsp_has_cnst; 6067 /* LAPACK working arrays for SVD or POD */ 6068 PetscBool skip_lapack,boolforchange; 6069 PetscScalar *work; 6070 PetscReal *singular_vals; 6071 #if defined(PETSC_USE_COMPLEX) 6072 PetscReal *rwork; 6073 #endif 6074 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6075 PetscBLASInt dummy_int=1; 6076 PetscScalar dummy_scalar=1.; 6077 PetscBool use_pod = PETSC_FALSE; 6078 6079 /* MKL SVD with same input gives different results on different processes! */ 6080 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL) 6081 use_pod = PETSC_TRUE; 6082 #endif 6083 /* Get index sets for faces, edges and vertices from graph */ 6084 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 6085 /* print some info */ 6086 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6087 PetscInt nv; 6088 6089 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6090 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 6091 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6092 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6093 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 6095 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 6096 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6097 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6098 } 6099 6100 /* free unneeded index sets */ 6101 if (!pcbddc->use_vertices) { 6102 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6103 } 6104 if (!pcbddc->use_edges) { 6105 for (i=0;i<n_ISForEdges;i++) { 6106 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6107 } 6108 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6109 n_ISForEdges = 0; 6110 } 6111 if (!pcbddc->use_faces) { 6112 for (i=0;i<n_ISForFaces;i++) { 6113 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6114 } 6115 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6116 n_ISForFaces = 0; 6117 } 6118 6119 /* check if near null space is attached to global mat */ 6120 if (pcbddc->use_nnsp) { 6121 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 6122 } else nearnullsp = NULL; 6123 6124 if (nearnullsp) { 6125 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 6126 /* remove any stored info */ 6127 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 6128 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6129 /* store information for BDDC solver reuse */ 6130 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 6131 pcbddc->onearnullspace = nearnullsp; 6132 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6133 for (i=0;i<nnsp_size;i++) { 6134 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 6135 } 6136 } else { /* if near null space is not provided BDDC uses constants by default */ 6137 nnsp_size = 0; 6138 nnsp_has_cnst = PETSC_TRUE; 6139 } 6140 /* get max number of constraints on a single cc */ 6141 max_constraints = nnsp_size; 6142 if (nnsp_has_cnst) max_constraints++; 6143 6144 /* 6145 Evaluate maximum storage size needed by the procedure 6146 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6147 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6148 There can be multiple constraints per connected component 6149 */ 6150 n_vertices = 0; 6151 if (ISForVertices) { 6152 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 6153 } 6154 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6155 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 6156 6157 total_counts = n_ISForFaces+n_ISForEdges; 6158 total_counts *= max_constraints; 6159 total_counts += n_vertices; 6160 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 6161 6162 total_counts = 0; 6163 max_size_of_constraint = 0; 6164 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6165 IS used_is; 6166 if (i<n_ISForEdges) { 6167 used_is = ISForEdges[i]; 6168 } else { 6169 used_is = ISForFaces[i-n_ISForEdges]; 6170 } 6171 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 6172 total_counts += j; 6173 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6174 } 6175 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 6176 6177 /* get local part of global near null space vectors */ 6178 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 6179 for (k=0;k<nnsp_size;k++) { 6180 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 6181 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6182 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6183 } 6184 6185 /* whether or not to skip lapack calls */ 6186 skip_lapack = PETSC_TRUE; 6187 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6188 6189 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6190 if (!skip_lapack) { 6191 PetscScalar temp_work; 6192 6193 if (use_pod) { 6194 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6195 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 6196 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 6197 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 6198 #if defined(PETSC_USE_COMPLEX) 6199 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 6200 #endif 6201 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6202 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6203 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 6204 lwork = -1; 6205 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6206 #if !defined(PETSC_USE_COMPLEX) 6207 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6208 #else 6209 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6210 #endif 6211 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6212 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6213 } else { 6214 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6215 /* SVD */ 6216 PetscInt max_n,min_n; 6217 max_n = max_size_of_constraint; 6218 min_n = max_constraints; 6219 if (max_size_of_constraint < max_constraints) { 6220 min_n = max_size_of_constraint; 6221 max_n = max_constraints; 6222 } 6223 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 6224 #if defined(PETSC_USE_COMPLEX) 6225 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 6226 #endif 6227 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6228 lwork = -1; 6229 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 6230 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 6231 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 6232 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6233 #if !defined(PETSC_USE_COMPLEX) 6234 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6235 #else 6236 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6237 #endif 6238 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6239 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6240 #else 6241 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6242 #endif /* on missing GESVD */ 6243 } 6244 /* Allocate optimal workspace */ 6245 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 6246 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 6247 } 6248 /* Now we can loop on constraining sets */ 6249 total_counts = 0; 6250 constraints_idxs_ptr[0] = 0; 6251 constraints_data_ptr[0] = 0; 6252 /* vertices */ 6253 if (n_vertices) { 6254 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6255 ierr = PetscArraycpy(constraints_idxs,is_indices,n_vertices);CHKERRQ(ierr); 6256 for (i=0;i<n_vertices;i++) { 6257 constraints_n[total_counts] = 1; 6258 constraints_data[total_counts] = 1.0; 6259 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6260 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6261 total_counts++; 6262 } 6263 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6264 n_vertices = total_counts; 6265 } 6266 6267 /* edges and faces */ 6268 total_counts_cc = total_counts; 6269 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6270 IS used_is; 6271 PetscBool idxs_copied = PETSC_FALSE; 6272 6273 if (ncc<n_ISForEdges) { 6274 used_is = ISForEdges[ncc]; 6275 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6276 } else { 6277 used_is = ISForFaces[ncc-n_ISForEdges]; 6278 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6279 } 6280 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6281 6282 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6283 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6284 /* change of basis should not be performed on local periodic nodes */ 6285 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6286 if (nnsp_has_cnst) { 6287 PetscScalar quad_value; 6288 6289 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6290 idxs_copied = PETSC_TRUE; 6291 6292 if (!pcbddc->use_nnsp_true) { 6293 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6294 } else { 6295 quad_value = 1.0; 6296 } 6297 for (j=0;j<size_of_constraint;j++) { 6298 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6299 } 6300 temp_constraints++; 6301 total_counts++; 6302 } 6303 for (k=0;k<nnsp_size;k++) { 6304 PetscReal real_value; 6305 PetscScalar *ptr_to_data; 6306 6307 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6308 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6309 for (j=0;j<size_of_constraint;j++) { 6310 ptr_to_data[j] = array[is_indices[j]]; 6311 } 6312 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6313 /* check if array is null on the connected component */ 6314 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6315 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6316 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6317 temp_constraints++; 6318 total_counts++; 6319 if (!idxs_copied) { 6320 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6321 idxs_copied = PETSC_TRUE; 6322 } 6323 } 6324 } 6325 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6326 valid_constraints = temp_constraints; 6327 if (!pcbddc->use_nnsp_true && temp_constraints) { 6328 if (temp_constraints == 1) { /* just normalize the constraint */ 6329 PetscScalar norm,*ptr_to_data; 6330 6331 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6333 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6334 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6335 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6336 } else { /* perform SVD */ 6337 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6338 6339 if (use_pod) { 6340 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6341 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6342 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6343 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6344 from that computed using LAPACKgesvd 6345 -> This is due to a different computation of eigenvectors in LAPACKheev 6346 -> The quality of the POD-computed basis will be the same */ 6347 ierr = PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);CHKERRQ(ierr); 6348 /* Store upper triangular part of correlation matrix */ 6349 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6350 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6351 for (j=0;j<temp_constraints;j++) { 6352 for (k=0;k<j+1;k++) { 6353 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6354 } 6355 } 6356 /* compute eigenvalues and eigenvectors of correlation matrix */ 6357 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6358 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6359 #if !defined(PETSC_USE_COMPLEX) 6360 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6361 #else 6362 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6363 #endif 6364 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6365 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6366 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6367 j = 0; 6368 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6369 total_counts = total_counts-j; 6370 valid_constraints = temp_constraints-j; 6371 /* scale and copy POD basis into used quadrature memory */ 6372 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6373 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6374 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6375 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6376 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6377 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6378 if (j<temp_constraints) { 6379 PetscInt ii; 6380 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6381 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6382 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6383 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6384 for (k=0;k<temp_constraints-j;k++) { 6385 for (ii=0;ii<size_of_constraint;ii++) { 6386 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6387 } 6388 } 6389 } 6390 } else { 6391 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6392 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6393 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6394 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6395 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6396 #if !defined(PETSC_USE_COMPLEX) 6397 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6398 #else 6399 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6400 #endif 6401 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6402 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6403 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6404 k = temp_constraints; 6405 if (k > size_of_constraint) k = size_of_constraint; 6406 j = 0; 6407 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6408 valid_constraints = k-j; 6409 total_counts = total_counts-temp_constraints+valid_constraints; 6410 #else 6411 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6412 #endif /* on missing GESVD */ 6413 } 6414 } 6415 } 6416 /* update pointers information */ 6417 if (valid_constraints) { 6418 constraints_n[total_counts_cc] = valid_constraints; 6419 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6420 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6421 /* set change_of_basis flag */ 6422 if (boolforchange) { 6423 PetscBTSet(change_basis,total_counts_cc); 6424 } 6425 total_counts_cc++; 6426 } 6427 } 6428 /* free workspace */ 6429 if (!skip_lapack) { 6430 ierr = PetscFree(work);CHKERRQ(ierr); 6431 #if defined(PETSC_USE_COMPLEX) 6432 ierr = PetscFree(rwork);CHKERRQ(ierr); 6433 #endif 6434 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6435 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6436 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6437 } 6438 for (k=0;k<nnsp_size;k++) { 6439 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6440 } 6441 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6442 /* free index sets of faces, edges and vertices */ 6443 for (i=0;i<n_ISForFaces;i++) { 6444 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6445 } 6446 if (n_ISForFaces) { 6447 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6448 } 6449 for (i=0;i<n_ISForEdges;i++) { 6450 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6451 } 6452 if (n_ISForEdges) { 6453 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6454 } 6455 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6456 } else { 6457 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6458 6459 total_counts = 0; 6460 n_vertices = 0; 6461 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6462 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6463 } 6464 max_constraints = 0; 6465 total_counts_cc = 0; 6466 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6467 total_counts += pcbddc->adaptive_constraints_n[i]; 6468 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6469 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6470 } 6471 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6472 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6473 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6474 constraints_data = pcbddc->adaptive_constraints_data; 6475 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6476 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6477 total_counts_cc = 0; 6478 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6479 if (pcbddc->adaptive_constraints_n[i]) { 6480 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6481 } 6482 } 6483 6484 max_size_of_constraint = 0; 6485 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6486 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6487 /* Change of basis */ 6488 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6489 if (pcbddc->use_change_of_basis) { 6490 for (i=0;i<sub_schurs->n_subs;i++) { 6491 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6492 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6493 } 6494 } 6495 } 6496 } 6497 pcbddc->local_primal_size = total_counts; 6498 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6499 6500 /* map constraints_idxs in boundary numbering */ 6501 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6502 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6503 6504 /* Create constraint matrix */ 6505 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6506 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6507 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6508 6509 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6510 /* determine if a QR strategy is needed for change of basis */ 6511 qr_needed = pcbddc->use_qr_single; 6512 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6513 total_primal_vertices=0; 6514 pcbddc->local_primal_size_cc = 0; 6515 for (i=0;i<total_counts_cc;i++) { 6516 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6517 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6518 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6519 pcbddc->local_primal_size_cc += 1; 6520 } else if (PetscBTLookup(change_basis,i)) { 6521 for (k=0;k<constraints_n[i];k++) { 6522 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6523 } 6524 pcbddc->local_primal_size_cc += constraints_n[i]; 6525 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6526 PetscBTSet(qr_needed_idx,i); 6527 qr_needed = PETSC_TRUE; 6528 } 6529 } else { 6530 pcbddc->local_primal_size_cc += 1; 6531 } 6532 } 6533 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6534 pcbddc->n_vertices = total_primal_vertices; 6535 /* permute indices in order to have a sorted set of vertices */ 6536 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6537 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6538 ierr = PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);CHKERRQ(ierr); 6539 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6540 6541 /* nonzero structure of constraint matrix */ 6542 /* and get reference dof for local constraints */ 6543 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6544 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6545 6546 j = total_primal_vertices; 6547 total_counts = total_primal_vertices; 6548 cum = total_primal_vertices; 6549 for (i=n_vertices;i<total_counts_cc;i++) { 6550 if (!PetscBTLookup(change_basis,i)) { 6551 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6552 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6553 cum++; 6554 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6555 for (k=0;k<constraints_n[i];k++) { 6556 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6557 nnz[j+k] = size_of_constraint; 6558 } 6559 j += constraints_n[i]; 6560 } 6561 } 6562 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6563 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6564 ierr = PetscFree(nnz);CHKERRQ(ierr); 6565 6566 /* set values in constraint matrix */ 6567 for (i=0;i<total_primal_vertices;i++) { 6568 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6569 } 6570 total_counts = total_primal_vertices; 6571 for (i=n_vertices;i<total_counts_cc;i++) { 6572 if (!PetscBTLookup(change_basis,i)) { 6573 PetscInt *cols; 6574 6575 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6576 cols = constraints_idxs+constraints_idxs_ptr[i]; 6577 for (k=0;k<constraints_n[i];k++) { 6578 PetscInt row = total_counts+k; 6579 PetscScalar *vals; 6580 6581 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6582 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6583 } 6584 total_counts += constraints_n[i]; 6585 } 6586 } 6587 /* assembling */ 6588 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6589 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6590 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6591 6592 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6593 if (pcbddc->use_change_of_basis) { 6594 /* dual and primal dofs on a single cc */ 6595 PetscInt dual_dofs,primal_dofs; 6596 /* working stuff for GEQRF */ 6597 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6598 PetscBLASInt lqr_work; 6599 /* working stuff for UNGQR */ 6600 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6601 PetscBLASInt lgqr_work; 6602 /* working stuff for TRTRS */ 6603 PetscScalar *trs_rhs = NULL; 6604 PetscBLASInt Blas_NRHS; 6605 /* pointers for values insertion into change of basis matrix */ 6606 PetscInt *start_rows,*start_cols; 6607 PetscScalar *start_vals; 6608 /* working stuff for values insertion */ 6609 PetscBT is_primal; 6610 PetscInt *aux_primal_numbering_B; 6611 /* matrix sizes */ 6612 PetscInt global_size,local_size; 6613 /* temporary change of basis */ 6614 Mat localChangeOfBasisMatrix; 6615 /* extra space for debugging */ 6616 PetscScalar *dbg_work = NULL; 6617 6618 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6619 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6620 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6621 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6622 /* nonzeros for local mat */ 6623 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6624 if (!pcbddc->benign_change || pcbddc->fake_change) { 6625 for (i=0;i<pcis->n;i++) nnz[i]=1; 6626 } else { 6627 const PetscInt *ii; 6628 PetscInt n; 6629 PetscBool flg_row; 6630 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6631 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6632 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6633 } 6634 for (i=n_vertices;i<total_counts_cc;i++) { 6635 if (PetscBTLookup(change_basis,i)) { 6636 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6637 if (PetscBTLookup(qr_needed_idx,i)) { 6638 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6639 } else { 6640 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6641 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6642 } 6643 } 6644 } 6645 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6646 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6647 ierr = PetscFree(nnz);CHKERRQ(ierr); 6648 /* Set interior change in the matrix */ 6649 if (!pcbddc->benign_change || pcbddc->fake_change) { 6650 for (i=0;i<pcis->n;i++) { 6651 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6652 } 6653 } else { 6654 const PetscInt *ii,*jj; 6655 PetscScalar *aa; 6656 PetscInt n; 6657 PetscBool flg_row; 6658 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6659 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6660 for (i=0;i<n;i++) { 6661 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6662 } 6663 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6664 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6665 } 6666 6667 if (pcbddc->dbg_flag) { 6668 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6670 } 6671 6672 6673 /* Now we loop on the constraints which need a change of basis */ 6674 /* 6675 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6676 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6677 6678 Basic blocks of change of basis matrix T computed by 6679 6680 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6681 6682 | 1 0 ... 0 s_1/S | 6683 | 0 1 ... 0 s_2/S | 6684 | ... | 6685 | 0 ... 1 s_{n-1}/S | 6686 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6687 6688 with S = \sum_{i=1}^n s_i^2 6689 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6690 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6691 6692 - QR decomposition of constraints otherwise 6693 */ 6694 if (qr_needed && max_size_of_constraint) { 6695 /* space to store Q */ 6696 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6697 /* array to store scaling factors for reflectors */ 6698 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6699 /* first we issue queries for optimal work */ 6700 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6701 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6702 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6703 lqr_work = -1; 6704 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6705 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6706 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6707 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6708 lgqr_work = -1; 6709 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6710 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6711 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6712 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6713 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6714 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6715 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6716 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6717 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6718 /* array to store rhs and solution of triangular solver */ 6719 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6720 /* allocating workspace for check */ 6721 if (pcbddc->dbg_flag) { 6722 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6723 } 6724 } 6725 /* array to store whether a node is primal or not */ 6726 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6727 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6728 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6729 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6730 for (i=0;i<total_primal_vertices;i++) { 6731 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6732 } 6733 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6734 6735 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6736 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6737 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6738 if (PetscBTLookup(change_basis,total_counts)) { 6739 /* get constraint info */ 6740 primal_dofs = constraints_n[total_counts]; 6741 dual_dofs = size_of_constraint-primal_dofs; 6742 6743 if (pcbddc->dbg_flag) { 6744 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6745 } 6746 6747 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6748 6749 /* copy quadrature constraints for change of basis check */ 6750 if (pcbddc->dbg_flag) { 6751 ierr = PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6752 } 6753 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6754 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6755 6756 /* compute QR decomposition of constraints */ 6757 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6758 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6759 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6760 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6761 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6762 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6763 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6764 6765 /* explictly compute R^-T */ 6766 ierr = PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);CHKERRQ(ierr); 6767 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6768 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6769 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6770 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6771 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6772 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6773 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6774 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6775 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6776 6777 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6780 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6782 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6783 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6784 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6785 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6786 6787 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6788 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6789 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6790 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6791 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6792 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6793 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6794 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6795 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6796 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6797 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6798 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6799 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6800 6801 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6802 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6803 /* insert cols for primal dofs */ 6804 for (j=0;j<primal_dofs;j++) { 6805 start_vals = &qr_basis[j*size_of_constraint]; 6806 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6807 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6808 } 6809 /* insert cols for dual dofs */ 6810 for (j=0,k=0;j<dual_dofs;k++) { 6811 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6812 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6813 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6814 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6815 j++; 6816 } 6817 } 6818 6819 /* check change of basis */ 6820 if (pcbddc->dbg_flag) { 6821 PetscInt ii,jj; 6822 PetscBool valid_qr=PETSC_TRUE; 6823 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6824 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6825 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6826 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6827 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6828 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6829 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6830 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6831 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6832 for (jj=0;jj<size_of_constraint;jj++) { 6833 for (ii=0;ii<primal_dofs;ii++) { 6834 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6835 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6836 } 6837 } 6838 if (!valid_qr) { 6839 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6840 for (jj=0;jj<size_of_constraint;jj++) { 6841 for (ii=0;ii<primal_dofs;ii++) { 6842 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6844 } 6845 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6846 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6847 } 6848 } 6849 } 6850 } else { 6851 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6852 } 6853 } 6854 } else { /* simple transformation block */ 6855 PetscInt row,col; 6856 PetscScalar val,norm; 6857 6858 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6859 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6860 for (j=0;j<size_of_constraint;j++) { 6861 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6862 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6863 if (!PetscBTLookup(is_primal,row_B)) { 6864 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6865 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6866 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6867 } else { 6868 for (k=0;k<size_of_constraint;k++) { 6869 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6870 if (row != col) { 6871 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6872 } else { 6873 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6874 } 6875 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6876 } 6877 } 6878 } 6879 if (pcbddc->dbg_flag) { 6880 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6881 } 6882 } 6883 } else { 6884 if (pcbddc->dbg_flag) { 6885 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6886 } 6887 } 6888 } 6889 6890 /* free workspace */ 6891 if (qr_needed) { 6892 if (pcbddc->dbg_flag) { 6893 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6894 } 6895 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6896 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6897 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6898 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6899 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6900 } 6901 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6902 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6903 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6904 6905 /* assembling of global change of variable */ 6906 if (!pcbddc->fake_change) { 6907 Mat tmat; 6908 PetscInt bs; 6909 6910 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6911 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6912 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6913 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6914 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6915 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6916 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6917 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6918 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6919 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6920 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6921 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6922 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6923 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6924 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6925 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6926 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6927 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6928 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6929 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6930 6931 /* check */ 6932 if (pcbddc->dbg_flag) { 6933 PetscReal error; 6934 Vec x,x_change; 6935 6936 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6937 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6938 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6939 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6940 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6941 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6942 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6943 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6944 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6945 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6946 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6947 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6948 if (error > PETSC_SMALL) { 6949 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6950 } 6951 ierr = VecDestroy(&x);CHKERRQ(ierr); 6952 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6953 } 6954 /* adapt sub_schurs computed (if any) */ 6955 if (pcbddc->use_deluxe_scaling) { 6956 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6957 6958 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6959 if (sub_schurs && sub_schurs->S_Ej_all) { 6960 Mat S_new,tmat; 6961 IS is_all_N,is_V_Sall = NULL; 6962 6963 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6964 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6965 if (pcbddc->deluxe_zerorows) { 6966 ISLocalToGlobalMapping NtoSall; 6967 IS is_V; 6968 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6969 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6970 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6971 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6972 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6973 } 6974 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6975 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6976 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6977 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6978 if (pcbddc->deluxe_zerorows) { 6979 const PetscScalar *array; 6980 const PetscInt *idxs_V,*idxs_all; 6981 PetscInt i,n_V; 6982 6983 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6984 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6985 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6986 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6987 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6988 for (i=0;i<n_V;i++) { 6989 PetscScalar val; 6990 PetscInt idx; 6991 6992 idx = idxs_V[i]; 6993 val = array[idxs_all[idxs_V[i]]]; 6994 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6995 } 6996 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6997 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6998 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6999 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7000 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7001 } 7002 sub_schurs->S_Ej_all = S_new; 7003 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7004 if (sub_schurs->sum_S_Ej_all) { 7005 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7006 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 7007 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7008 if (pcbddc->deluxe_zerorows) { 7009 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7010 } 7011 sub_schurs->sum_S_Ej_all = S_new; 7012 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7013 } 7014 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 7015 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 7016 } 7017 /* destroy any change of basis context in sub_schurs */ 7018 if (sub_schurs && sub_schurs->change) { 7019 PetscInt i; 7020 7021 for (i=0;i<sub_schurs->n_subs;i++) { 7022 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 7023 } 7024 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 7025 } 7026 } 7027 if (pcbddc->switch_static) { /* need to save the local change */ 7028 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7029 } else { 7030 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 7031 } 7032 /* determine if any process has changed the pressures locally */ 7033 pcbddc->change_interior = pcbddc->benign_have_null; 7034 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7035 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 7036 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7037 pcbddc->use_qr_single = qr_needed; 7038 } 7039 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7040 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7041 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 7042 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7043 } else { 7044 Mat benign_global = NULL; 7045 if (pcbddc->benign_have_null) { 7046 Mat M; 7047 7048 pcbddc->change_interior = PETSC_TRUE; 7049 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 7050 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 7051 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 7052 if (pcbddc->benign_change) { 7053 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 7054 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 7055 } else { 7056 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 7057 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 7058 } 7059 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 7060 ierr = MatDestroy(&M);CHKERRQ(ierr); 7061 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7062 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7063 } 7064 if (pcbddc->user_ChangeOfBasisMatrix) { 7065 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 7066 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 7067 } else if (pcbddc->benign_have_null) { 7068 pcbddc->ChangeOfBasisMatrix = benign_global; 7069 } 7070 } 7071 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7072 IS is_global; 7073 const PetscInt *gidxs; 7074 7075 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7076 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 7077 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7078 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 7079 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 7080 } 7081 } 7082 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7083 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 7084 } 7085 7086 if (!pcbddc->fake_change) { 7087 /* add pressure dofs to set of primal nodes for numbering purposes */ 7088 for (i=0;i<pcbddc->benign_n;i++) { 7089 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7090 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7091 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7092 pcbddc->local_primal_size_cc++; 7093 pcbddc->local_primal_size++; 7094 } 7095 7096 /* check if a new primal space has been introduced (also take into account benign trick) */ 7097 pcbddc->new_primal_space_local = PETSC_TRUE; 7098 if (olocal_primal_size == pcbddc->local_primal_size) { 7099 ierr = PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7100 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7101 if (!pcbddc->new_primal_space_local) { 7102 ierr = PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7103 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7104 } 7105 } 7106 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7107 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7108 } 7109 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 7110 7111 /* flush dbg viewer */ 7112 if (pcbddc->dbg_flag) { 7113 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7114 } 7115 7116 /* free workspace */ 7117 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 7118 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 7119 if (!pcbddc->adaptive_selection) { 7120 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 7121 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 7122 } else { 7123 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7124 pcbddc->adaptive_constraints_idxs_ptr, 7125 pcbddc->adaptive_constraints_data_ptr, 7126 pcbddc->adaptive_constraints_idxs, 7127 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 7128 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 7129 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 7130 } 7131 PetscFunctionReturn(0); 7132 } 7133 7134 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7135 { 7136 ISLocalToGlobalMapping map; 7137 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7138 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7139 PetscInt i,N; 7140 PetscBool rcsr = PETSC_FALSE; 7141 PetscErrorCode ierr; 7142 7143 PetscFunctionBegin; 7144 if (pcbddc->recompute_topography) { 7145 pcbddc->graphanalyzed = PETSC_FALSE; 7146 /* Reset previously computed graph */ 7147 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 7148 /* Init local Graph struct */ 7149 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 7150 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 7151 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 7152 7153 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7154 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7155 } 7156 /* Check validity of the csr graph passed in by the user */ 7157 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7158 7159 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7160 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7161 PetscInt *xadj,*adjncy; 7162 PetscInt nvtxs; 7163 PetscBool flg_row=PETSC_FALSE; 7164 7165 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7166 if (flg_row) { 7167 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 7168 pcbddc->computed_rowadj = PETSC_TRUE; 7169 } 7170 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7171 rcsr = PETSC_TRUE; 7172 } 7173 if (pcbddc->dbg_flag) { 7174 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7175 } 7176 7177 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7178 PetscReal *lcoords; 7179 PetscInt n; 7180 MPI_Datatype dimrealtype; 7181 7182 /* TODO: support for blocked */ 7183 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7184 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7185 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 7186 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 7187 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 7188 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 7189 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 7190 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 7191 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 7192 7193 pcbddc->mat_graph->coords = lcoords; 7194 pcbddc->mat_graph->cloc = PETSC_TRUE; 7195 pcbddc->mat_graph->cnloc = n; 7196 } 7197 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7198 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 7199 7200 /* Setup of Graph */ 7201 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7202 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7203 7204 /* attach info on disconnected subdomains if present */ 7205 if (pcbddc->n_local_subs) { 7206 PetscInt *local_subs,n,totn; 7207 7208 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7209 ierr = PetscMalloc1(n,&local_subs);CHKERRQ(ierr); 7210 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7211 for (i=0;i<pcbddc->n_local_subs;i++) { 7212 const PetscInt *idxs; 7213 PetscInt nl,j; 7214 7215 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 7216 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7217 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7218 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7219 } 7220 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7221 pcbddc->mat_graph->n_local_subs = totn + 1; 7222 pcbddc->mat_graph->local_subs = local_subs; 7223 } 7224 } 7225 7226 if (!pcbddc->graphanalyzed) { 7227 /* Graph's connected components analysis */ 7228 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 7229 pcbddc->graphanalyzed = PETSC_TRUE; 7230 pcbddc->corner_selected = pcbddc->corner_selection; 7231 } 7232 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7233 PetscFunctionReturn(0); 7234 } 7235 7236 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7237 { 7238 PetscInt i,j,n; 7239 PetscScalar *alphas; 7240 PetscReal norm,*onorms; 7241 PetscErrorCode ierr; 7242 7243 PetscFunctionBegin; 7244 n = *nio; 7245 if (!n) PetscFunctionReturn(0); 7246 ierr = PetscMalloc2(n,&alphas,n,&onorms);CHKERRQ(ierr); 7247 ierr = VecNormalize(vecs[0],&norm);CHKERRQ(ierr); 7248 if (norm < PETSC_SMALL) { 7249 onorms[0] = 0.0; 7250 ierr = VecSet(vecs[0],0.0);CHKERRQ(ierr); 7251 } else { 7252 onorms[0] = norm; 7253 } 7254 7255 for (i=1;i<n;i++) { 7256 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7257 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7258 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7259 ierr = VecNormalize(vecs[i],&norm);CHKERRQ(ierr); 7260 if (norm < PETSC_SMALL) { 7261 onorms[i] = 0.0; 7262 ierr = VecSet(vecs[i],0.0);CHKERRQ(ierr); 7263 } else { 7264 onorms[i] = norm; 7265 } 7266 } 7267 /* push nonzero vectors at the beginning */ 7268 for (i=0;i<n;i++) { 7269 if (onorms[i] == 0.0) { 7270 for (j=i+1;j<n;j++) { 7271 if (onorms[j] != 0.0) { 7272 ierr = VecCopy(vecs[j],vecs[i]);CHKERRQ(ierr); 7273 onorms[j] = 0.0; 7274 } 7275 } 7276 } 7277 } 7278 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7279 ierr = PetscFree2(alphas,onorms);CHKERRQ(ierr); 7280 PetscFunctionReturn(0); 7281 } 7282 7283 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7284 { 7285 Mat A; 7286 PetscInt n_neighs,*neighs,*n_shared,**shared; 7287 PetscMPIInt size,rank,color; 7288 PetscInt *xadj,*adjncy; 7289 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7290 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7291 PetscInt void_procs,*procs_candidates = NULL; 7292 PetscInt xadj_count,*count; 7293 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7294 PetscSubcomm psubcomm; 7295 MPI_Comm subcomm; 7296 PetscErrorCode ierr; 7297 7298 PetscFunctionBegin; 7299 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7300 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7301 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7302 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7303 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7304 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7305 7306 if (have_void) *have_void = PETSC_FALSE; 7307 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7308 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7309 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7310 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7311 im_active = !!n; 7312 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7313 void_procs = size - active_procs; 7314 /* get ranks of of non-active processes in mat communicator */ 7315 if (void_procs) { 7316 PetscInt ncand; 7317 7318 if (have_void) *have_void = PETSC_TRUE; 7319 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7320 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7321 for (i=0,ncand=0;i<size;i++) { 7322 if (!procs_candidates[i]) { 7323 procs_candidates[ncand++] = i; 7324 } 7325 } 7326 /* force n_subdomains to be not greater that the number of non-active processes */ 7327 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7328 } 7329 7330 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7331 number of subdomains requested 1 -> send to master or first candidate in voids */ 7332 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7333 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7334 PetscInt issize,isidx,dest; 7335 if (*n_subdomains == 1) dest = 0; 7336 else dest = rank; 7337 if (im_active) { 7338 issize = 1; 7339 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7340 isidx = procs_candidates[dest]; 7341 } else { 7342 isidx = dest; 7343 } 7344 } else { 7345 issize = 0; 7346 isidx = -1; 7347 } 7348 if (*n_subdomains != 1) *n_subdomains = active_procs; 7349 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7350 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7351 PetscFunctionReturn(0); 7352 } 7353 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7354 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7355 threshold = PetscMax(threshold,2); 7356 7357 /* Get info on mapping */ 7358 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7359 7360 /* build local CSR graph of subdomains' connectivity */ 7361 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7362 xadj[0] = 0; 7363 xadj[1] = PetscMax(n_neighs-1,0); 7364 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7365 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7366 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7367 for (i=1;i<n_neighs;i++) 7368 for (j=0;j<n_shared[i];j++) 7369 count[shared[i][j]] += 1; 7370 7371 xadj_count = 0; 7372 for (i=1;i<n_neighs;i++) { 7373 for (j=0;j<n_shared[i];j++) { 7374 if (count[shared[i][j]] < threshold) { 7375 adjncy[xadj_count] = neighs[i]; 7376 adjncy_wgt[xadj_count] = n_shared[i]; 7377 xadj_count++; 7378 break; 7379 } 7380 } 7381 } 7382 xadj[1] = xadj_count; 7383 ierr = PetscFree(count);CHKERRQ(ierr); 7384 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7385 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7386 7387 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7388 7389 /* Restrict work on active processes only */ 7390 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7391 if (void_procs) { 7392 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7393 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7394 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7395 subcomm = PetscSubcommChild(psubcomm); 7396 } else { 7397 psubcomm = NULL; 7398 subcomm = PetscObjectComm((PetscObject)mat); 7399 } 7400 7401 v_wgt = NULL; 7402 if (!color) { 7403 ierr = PetscFree(xadj);CHKERRQ(ierr); 7404 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7405 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7406 } else { 7407 Mat subdomain_adj; 7408 IS new_ranks,new_ranks_contig; 7409 MatPartitioning partitioner; 7410 PetscInt rstart=0,rend=0; 7411 PetscInt *is_indices,*oldranks; 7412 PetscMPIInt size; 7413 PetscBool aggregate; 7414 7415 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7416 if (void_procs) { 7417 PetscInt prank = rank; 7418 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7419 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7420 for (i=0;i<xadj[1];i++) { 7421 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7422 } 7423 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7424 } else { 7425 oldranks = NULL; 7426 } 7427 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7428 if (aggregate) { /* TODO: all this part could be made more efficient */ 7429 PetscInt lrows,row,ncols,*cols; 7430 PetscMPIInt nrank; 7431 PetscScalar *vals; 7432 7433 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7434 lrows = 0; 7435 if (nrank<redprocs) { 7436 lrows = size/redprocs; 7437 if (nrank<size%redprocs) lrows++; 7438 } 7439 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7440 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7441 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7442 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7443 row = nrank; 7444 ncols = xadj[1]-xadj[0]; 7445 cols = adjncy; 7446 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7447 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7448 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7449 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7450 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7451 ierr = PetscFree(xadj);CHKERRQ(ierr); 7452 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7453 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7454 ierr = PetscFree(vals);CHKERRQ(ierr); 7455 if (use_vwgt) { 7456 Vec v; 7457 const PetscScalar *array; 7458 PetscInt nl; 7459 7460 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7461 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7462 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7463 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7464 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7465 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7466 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7467 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7468 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7469 ierr = VecDestroy(&v);CHKERRQ(ierr); 7470 } 7471 } else { 7472 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7473 if (use_vwgt) { 7474 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7475 v_wgt[0] = n; 7476 } 7477 } 7478 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7479 7480 /* Partition */ 7481 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7482 #if defined(PETSC_HAVE_PTSCOTCH) 7483 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);CHKERRQ(ierr); 7484 #elif defined(PETSC_HAVE_PARMETIS) 7485 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); 7486 #else 7487 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);CHKERRQ(ierr); 7488 #endif 7489 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7490 if (v_wgt) { 7491 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7492 } 7493 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7494 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7495 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7496 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7497 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7498 7499 /* renumber new_ranks to avoid "holes" in new set of processors */ 7500 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7501 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7502 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7503 if (!aggregate) { 7504 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7505 #if defined(PETSC_USE_DEBUG) 7506 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7507 #endif 7508 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7509 } else if (oldranks) { 7510 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7511 } else { 7512 ranks_send_to_idx[0] = is_indices[0]; 7513 } 7514 } else { 7515 PetscInt idx = 0; 7516 PetscMPIInt tag; 7517 MPI_Request *reqs; 7518 7519 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7520 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7521 for (i=rstart;i<rend;i++) { 7522 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7523 } 7524 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7525 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7526 ierr = PetscFree(reqs);CHKERRQ(ierr); 7527 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7528 #if defined(PETSC_USE_DEBUG) 7529 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7530 #endif 7531 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7532 } else if (oldranks) { 7533 ranks_send_to_idx[0] = oldranks[idx]; 7534 } else { 7535 ranks_send_to_idx[0] = idx; 7536 } 7537 } 7538 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7539 /* clean up */ 7540 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7541 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7542 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7543 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7544 } 7545 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7546 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7547 7548 /* assemble parallel IS for sends */ 7549 i = 1; 7550 if (!color) i=0; 7551 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7552 PetscFunctionReturn(0); 7553 } 7554 7555 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7556 7557 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7558 { 7559 Mat local_mat; 7560 IS is_sends_internal; 7561 PetscInt rows,cols,new_local_rows; 7562 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7563 PetscBool ismatis,isdense,newisdense,destroy_mat; 7564 ISLocalToGlobalMapping l2gmap; 7565 PetscInt* l2gmap_indices; 7566 const PetscInt* is_indices; 7567 MatType new_local_type; 7568 /* buffers */ 7569 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7570 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7571 PetscInt *recv_buffer_idxs_local; 7572 PetscScalar *ptr_vals,*recv_buffer_vals; 7573 const PetscScalar *send_buffer_vals; 7574 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7575 /* MPI */ 7576 MPI_Comm comm,comm_n; 7577 PetscSubcomm subcomm; 7578 PetscMPIInt n_sends,n_recvs,size; 7579 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7580 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7581 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7582 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7583 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7584 PetscErrorCode ierr; 7585 7586 PetscFunctionBegin; 7587 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7588 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7589 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7590 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7591 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7592 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7593 PetscValidLogicalCollectiveBool(mat,reuse,6); 7594 PetscValidLogicalCollectiveInt(mat,nis,8); 7595 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7596 if (nvecs) { 7597 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7598 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7599 } 7600 /* further checks */ 7601 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7602 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7603 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7604 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7605 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7606 if (reuse && *mat_n) { 7607 PetscInt mrows,mcols,mnrows,mncols; 7608 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7609 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7610 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7611 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7612 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7613 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7614 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7615 } 7616 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7617 PetscValidLogicalCollectiveInt(mat,bs,0); 7618 7619 /* prepare IS for sending if not provided */ 7620 if (!is_sends) { 7621 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7622 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7623 } else { 7624 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7625 is_sends_internal = is_sends; 7626 } 7627 7628 /* get comm */ 7629 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7630 7631 /* compute number of sends */ 7632 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7633 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7634 7635 /* compute number of receives */ 7636 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 7637 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7638 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 7639 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7640 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7641 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7642 ierr = PetscFree(iflags);CHKERRQ(ierr); 7643 7644 /* restrict comm if requested */ 7645 subcomm = 0; 7646 destroy_mat = PETSC_FALSE; 7647 if (restrict_comm) { 7648 PetscMPIInt color,subcommsize; 7649 7650 color = 0; 7651 if (restrict_full) { 7652 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7653 } else { 7654 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7655 } 7656 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7657 subcommsize = size - subcommsize; 7658 /* check if reuse has been requested */ 7659 if (reuse) { 7660 if (*mat_n) { 7661 PetscMPIInt subcommsize2; 7662 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7663 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7664 comm_n = PetscObjectComm((PetscObject)*mat_n); 7665 } else { 7666 comm_n = PETSC_COMM_SELF; 7667 } 7668 } else { /* MAT_INITIAL_MATRIX */ 7669 PetscMPIInt rank; 7670 7671 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7672 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7673 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7674 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7675 comm_n = PetscSubcommChild(subcomm); 7676 } 7677 /* flag to destroy *mat_n if not significative */ 7678 if (color) destroy_mat = PETSC_TRUE; 7679 } else { 7680 comm_n = comm; 7681 } 7682 7683 /* prepare send/receive buffers */ 7684 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7685 ierr = PetscArrayzero(ilengths_idxs,size);CHKERRQ(ierr); 7686 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7687 ierr = PetscArrayzero(ilengths_vals,size);CHKERRQ(ierr); 7688 if (nis) { 7689 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7690 } 7691 7692 /* Get data from local matrices */ 7693 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7694 /* TODO: See below some guidelines on how to prepare the local buffers */ 7695 /* 7696 send_buffer_vals should contain the raw values of the local matrix 7697 send_buffer_idxs should contain: 7698 - MatType_PRIVATE type 7699 - PetscInt size_of_l2gmap 7700 - PetscInt global_row_indices[size_of_l2gmap] 7701 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7702 */ 7703 else { 7704 ierr = MatDenseGetArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7705 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7706 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7707 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7708 send_buffer_idxs[1] = i; 7709 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7710 ierr = PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);CHKERRQ(ierr); 7711 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7712 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7713 for (i=0;i<n_sends;i++) { 7714 ilengths_vals[is_indices[i]] = len*len; 7715 ilengths_idxs[is_indices[i]] = len+2; 7716 } 7717 } 7718 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7719 /* additional is (if any) */ 7720 if (nis) { 7721 PetscMPIInt psum; 7722 PetscInt j; 7723 for (j=0,psum=0;j<nis;j++) { 7724 PetscInt plen; 7725 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7726 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7727 psum += len+1; /* indices + lenght */ 7728 } 7729 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7730 for (j=0,psum=0;j<nis;j++) { 7731 PetscInt plen; 7732 const PetscInt *is_array_idxs; 7733 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7734 send_buffer_idxs_is[psum] = plen; 7735 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7736 ierr = PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);CHKERRQ(ierr); 7737 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7738 psum += plen+1; /* indices + lenght */ 7739 } 7740 for (i=0;i<n_sends;i++) { 7741 ilengths_idxs_is[is_indices[i]] = psum; 7742 } 7743 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7744 } 7745 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7746 7747 buf_size_idxs = 0; 7748 buf_size_vals = 0; 7749 buf_size_idxs_is = 0; 7750 buf_size_vecs = 0; 7751 for (i=0;i<n_recvs;i++) { 7752 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7753 buf_size_vals += (PetscInt)olengths_vals[i]; 7754 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7755 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7756 } 7757 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7758 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7759 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7760 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7761 7762 /* get new tags for clean communications */ 7763 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7764 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7765 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7766 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7767 7768 /* allocate for requests */ 7769 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7770 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7771 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7772 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7773 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7774 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7775 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7776 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7777 7778 /* communications */ 7779 ptr_idxs = recv_buffer_idxs; 7780 ptr_vals = recv_buffer_vals; 7781 ptr_idxs_is = recv_buffer_idxs_is; 7782 ptr_vecs = recv_buffer_vecs; 7783 for (i=0;i<n_recvs;i++) { 7784 source_dest = onodes[i]; 7785 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7786 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7787 ptr_idxs += olengths_idxs[i]; 7788 ptr_vals += olengths_vals[i]; 7789 if (nis) { 7790 source_dest = onodes_is[i]; 7791 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7792 ptr_idxs_is += olengths_idxs_is[i]; 7793 } 7794 if (nvecs) { 7795 source_dest = onodes[i]; 7796 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7797 ptr_vecs += olengths_idxs[i]-2; 7798 } 7799 } 7800 for (i=0;i<n_sends;i++) { 7801 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7802 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7803 ierr = MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7804 if (nis) { 7805 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7806 } 7807 if (nvecs) { 7808 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7809 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7810 } 7811 } 7812 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7813 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7814 7815 /* assemble new l2g map */ 7816 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7817 ptr_idxs = recv_buffer_idxs; 7818 new_local_rows = 0; 7819 for (i=0;i<n_recvs;i++) { 7820 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7821 ptr_idxs += olengths_idxs[i]; 7822 } 7823 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7824 ptr_idxs = recv_buffer_idxs; 7825 new_local_rows = 0; 7826 for (i=0;i<n_recvs;i++) { 7827 ierr = PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));CHKERRQ(ierr); 7828 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7829 ptr_idxs += olengths_idxs[i]; 7830 } 7831 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7832 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7833 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7834 7835 /* infer new local matrix type from received local matrices type */ 7836 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7837 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7838 if (n_recvs) { 7839 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7840 ptr_idxs = recv_buffer_idxs; 7841 for (i=0;i<n_recvs;i++) { 7842 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7843 new_local_type_private = MATAIJ_PRIVATE; 7844 break; 7845 } 7846 ptr_idxs += olengths_idxs[i]; 7847 } 7848 switch (new_local_type_private) { 7849 case MATDENSE_PRIVATE: 7850 new_local_type = MATSEQAIJ; 7851 bs = 1; 7852 break; 7853 case MATAIJ_PRIVATE: 7854 new_local_type = MATSEQAIJ; 7855 bs = 1; 7856 break; 7857 case MATBAIJ_PRIVATE: 7858 new_local_type = MATSEQBAIJ; 7859 break; 7860 case MATSBAIJ_PRIVATE: 7861 new_local_type = MATSEQSBAIJ; 7862 break; 7863 default: 7864 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7865 break; 7866 } 7867 } else { /* by default, new_local_type is seqaij */ 7868 new_local_type = MATSEQAIJ; 7869 bs = 1; 7870 } 7871 7872 /* create MATIS object if needed */ 7873 if (!reuse) { 7874 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7875 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7876 } else { 7877 /* it also destroys the local matrices */ 7878 if (*mat_n) { 7879 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7880 } else { /* this is a fake object */ 7881 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7882 } 7883 } 7884 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7885 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7886 7887 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7888 7889 /* Global to local map of received indices */ 7890 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7891 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7892 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7893 7894 /* restore attributes -> type of incoming data and its size */ 7895 buf_size_idxs = 0; 7896 for (i=0;i<n_recvs;i++) { 7897 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7898 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7899 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7900 } 7901 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7902 7903 /* set preallocation */ 7904 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7905 if (!newisdense) { 7906 PetscInt *new_local_nnz=0; 7907 7908 ptr_idxs = recv_buffer_idxs_local; 7909 if (n_recvs) { 7910 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7911 } 7912 for (i=0;i<n_recvs;i++) { 7913 PetscInt j; 7914 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7915 for (j=0;j<*(ptr_idxs+1);j++) { 7916 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7917 } 7918 } else { 7919 /* TODO */ 7920 } 7921 ptr_idxs += olengths_idxs[i]; 7922 } 7923 if (new_local_nnz) { 7924 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7925 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7926 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7927 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7928 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7929 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7930 } else { 7931 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7932 } 7933 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7934 } else { 7935 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7936 } 7937 7938 /* set values */ 7939 ptr_vals = recv_buffer_vals; 7940 ptr_idxs = recv_buffer_idxs_local; 7941 for (i=0;i<n_recvs;i++) { 7942 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7943 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7944 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7945 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7946 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7947 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7948 } else { 7949 /* TODO */ 7950 } 7951 ptr_idxs += olengths_idxs[i]; 7952 ptr_vals += olengths_vals[i]; 7953 } 7954 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7955 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7956 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7957 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7958 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7959 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7960 7961 #if 0 7962 if (!restrict_comm) { /* check */ 7963 Vec lvec,rvec; 7964 PetscReal infty_error; 7965 7966 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7967 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7968 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7969 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7970 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7971 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7972 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7973 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7974 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7975 } 7976 #endif 7977 7978 /* assemble new additional is (if any) */ 7979 if (nis) { 7980 PetscInt **temp_idxs,*count_is,j,psum; 7981 7982 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7983 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7984 ptr_idxs = recv_buffer_idxs_is; 7985 psum = 0; 7986 for (i=0;i<n_recvs;i++) { 7987 for (j=0;j<nis;j++) { 7988 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7989 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7990 psum += plen; 7991 ptr_idxs += plen+1; /* shift pointer to received data */ 7992 } 7993 } 7994 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7995 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7996 for (i=1;i<nis;i++) { 7997 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7998 } 7999 ierr = PetscArrayzero(count_is,nis);CHKERRQ(ierr); 8000 ptr_idxs = recv_buffer_idxs_is; 8001 for (i=0;i<n_recvs;i++) { 8002 for (j=0;j<nis;j++) { 8003 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8004 ierr = PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);CHKERRQ(ierr); 8005 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8006 ptr_idxs += plen+1; /* shift pointer to received data */ 8007 } 8008 } 8009 for (i=0;i<nis;i++) { 8010 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8011 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr); 8012 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8013 } 8014 ierr = PetscFree(count_is);CHKERRQ(ierr); 8015 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 8016 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 8017 } 8018 /* free workspace */ 8019 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 8020 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8021 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 8022 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8023 if (isdense) { 8024 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 8025 ierr = MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 8026 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 8027 } else { 8028 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 8029 } 8030 if (nis) { 8031 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8032 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 8033 } 8034 8035 if (nvecs) { 8036 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8037 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8038 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8039 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8040 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 8041 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 8042 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 8043 /* set values */ 8044 ptr_vals = recv_buffer_vecs; 8045 ptr_idxs = recv_buffer_idxs_local; 8046 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8047 for (i=0;i<n_recvs;i++) { 8048 PetscInt j; 8049 for (j=0;j<*(ptr_idxs+1);j++) { 8050 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8051 } 8052 ptr_idxs += olengths_idxs[i]; 8053 ptr_vals += olengths_idxs[i]-2; 8054 } 8055 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8056 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 8057 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 8058 } 8059 8060 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 8061 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 8062 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 8063 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 8064 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 8065 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 8066 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 8067 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 8068 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 8069 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 8070 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 8071 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 8072 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 8073 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 8074 ierr = PetscFree(onodes);CHKERRQ(ierr); 8075 if (nis) { 8076 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 8077 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 8078 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 8079 } 8080 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 8081 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8082 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 8083 for (i=0;i<nis;i++) { 8084 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8085 } 8086 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8087 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8088 } 8089 *mat_n = NULL; 8090 } 8091 PetscFunctionReturn(0); 8092 } 8093 8094 /* temporary hack into ksp private data structure */ 8095 #include <petsc/private/kspimpl.h> 8096 8097 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8098 { 8099 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8100 PC_IS *pcis = (PC_IS*)pc->data; 8101 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8102 Mat coarsedivudotp = NULL; 8103 Mat coarseG,t_coarse_mat_is; 8104 MatNullSpace CoarseNullSpace = NULL; 8105 ISLocalToGlobalMapping coarse_islg; 8106 IS coarse_is,*isarray,corners; 8107 PetscInt i,im_active=-1,active_procs=-1; 8108 PetscInt nis,nisdofs,nisneu,nisvert; 8109 PetscInt coarse_eqs_per_proc; 8110 PC pc_temp; 8111 PCType coarse_pc_type; 8112 KSPType coarse_ksp_type; 8113 PetscBool multilevel_requested,multilevel_allowed; 8114 PetscBool coarse_reuse; 8115 PetscInt ncoarse,nedcfield; 8116 PetscBool compute_vecs = PETSC_FALSE; 8117 PetscScalar *array; 8118 MatReuse coarse_mat_reuse; 8119 PetscBool restr, full_restr, have_void; 8120 PetscMPIInt size; 8121 PetscErrorCode ierr; 8122 8123 PetscFunctionBegin; 8124 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8125 /* Assign global numbering to coarse dofs */ 8126 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8127 PetscInt ocoarse_size; 8128 compute_vecs = PETSC_TRUE; 8129 8130 pcbddc->new_primal_space = PETSC_TRUE; 8131 ocoarse_size = pcbddc->coarse_size; 8132 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 8133 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 8134 /* see if we can avoid some work */ 8135 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8136 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8137 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8138 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 8139 coarse_reuse = PETSC_FALSE; 8140 } else { /* we can safely reuse already computed coarse matrix */ 8141 coarse_reuse = PETSC_TRUE; 8142 } 8143 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8144 coarse_reuse = PETSC_FALSE; 8145 } 8146 /* reset any subassembling information */ 8147 if (!coarse_reuse || pcbddc->recompute_topography) { 8148 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8149 } 8150 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8151 coarse_reuse = PETSC_TRUE; 8152 } 8153 if (coarse_reuse && pcbddc->coarse_ksp) { 8154 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 8155 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 8156 coarse_mat_reuse = MAT_REUSE_MATRIX; 8157 } else { 8158 coarse_mat = NULL; 8159 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8160 } 8161 8162 /* creates temporary l2gmap and IS for coarse indexes */ 8163 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 8164 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 8165 8166 /* creates temporary MATIS object for coarse matrix */ 8167 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 8168 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 8169 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 8170 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8171 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8172 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 8173 8174 /* count "active" (i.e. with positive local size) and "void" processes */ 8175 im_active = !!(pcis->n); 8176 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8177 8178 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8179 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8180 /* full_restr : just use the receivers from the subassembling pattern */ 8181 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 8182 coarse_mat_is = NULL; 8183 multilevel_allowed = PETSC_FALSE; 8184 multilevel_requested = PETSC_FALSE; 8185 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8186 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8187 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8188 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8189 if (multilevel_requested) { 8190 ncoarse = active_procs/pcbddc->coarsening_ratio; 8191 restr = PETSC_FALSE; 8192 full_restr = PETSC_FALSE; 8193 } else { 8194 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8195 restr = PETSC_TRUE; 8196 full_restr = PETSC_TRUE; 8197 } 8198 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8199 ncoarse = PetscMax(1,ncoarse); 8200 if (!pcbddc->coarse_subassembling) { 8201 if (pcbddc->coarsening_ratio > 1) { 8202 if (multilevel_requested) { 8203 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8204 } else { 8205 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8206 } 8207 } else { 8208 PetscMPIInt rank; 8209 8210 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 8211 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8212 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8213 } 8214 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8215 PetscInt psum; 8216 if (pcbddc->coarse_ksp) psum = 1; 8217 else psum = 0; 8218 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8219 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8220 } 8221 /* determine if we can go multilevel */ 8222 if (multilevel_requested) { 8223 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8224 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8225 } 8226 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8227 8228 /* dump subassembling pattern */ 8229 if (pcbddc->dbg_flag && multilevel_allowed) { 8230 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 8231 } 8232 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8233 nedcfield = -1; 8234 corners = NULL; 8235 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8236 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8237 const PetscInt *idxs; 8238 ISLocalToGlobalMapping tmap; 8239 8240 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8241 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 8242 /* allocate space for temporary storage */ 8243 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 8244 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 8245 /* allocate for IS array */ 8246 nisdofs = pcbddc->n_ISForDofsLocal; 8247 if (pcbddc->nedclocal) { 8248 if (pcbddc->nedfield > -1) { 8249 nedcfield = pcbddc->nedfield; 8250 } else { 8251 nedcfield = 0; 8252 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8253 nisdofs = 1; 8254 } 8255 } 8256 nisneu = !!pcbddc->NeumannBoundariesLocal; 8257 nisvert = 0; /* nisvert is not used */ 8258 nis = nisdofs + nisneu + nisvert; 8259 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 8260 /* dofs splitting */ 8261 for (i=0;i<nisdofs;i++) { 8262 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 8263 if (nedcfield != i) { 8264 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 8265 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8266 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8267 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8268 } else { 8269 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8270 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8271 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8272 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8273 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8274 } 8275 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8276 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8277 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8278 } 8279 /* neumann boundaries */ 8280 if (pcbddc->NeumannBoundariesLocal) { 8281 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8282 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8283 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8284 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8285 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8286 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8287 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8288 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8289 } 8290 /* coordinates */ 8291 if (pcbddc->corner_selected) { 8292 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8293 ierr = ISGetLocalSize(corners,&tsize);CHKERRQ(ierr); 8294 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8295 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8296 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8297 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8298 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8299 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8300 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);CHKERRQ(ierr); 8301 } 8302 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8303 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8304 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8305 } else { 8306 nis = 0; 8307 nisdofs = 0; 8308 nisneu = 0; 8309 nisvert = 0; 8310 isarray = NULL; 8311 } 8312 /* destroy no longer needed map */ 8313 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8314 8315 /* subassemble */ 8316 if (multilevel_allowed) { 8317 Vec vp[1]; 8318 PetscInt nvecs = 0; 8319 PetscBool reuse,reuser; 8320 8321 if (coarse_mat) reuse = PETSC_TRUE; 8322 else reuse = PETSC_FALSE; 8323 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8324 vp[0] = NULL; 8325 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8326 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8327 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8328 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8329 nvecs = 1; 8330 8331 if (pcbddc->divudotp) { 8332 Mat B,loc_divudotp; 8333 Vec v,p; 8334 IS dummy; 8335 PetscInt np; 8336 8337 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8338 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8339 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8340 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8341 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8342 ierr = VecSet(p,1.);CHKERRQ(ierr); 8343 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8344 ierr = VecDestroy(&p);CHKERRQ(ierr); 8345 ierr = MatDestroy(&B);CHKERRQ(ierr); 8346 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8347 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8348 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8349 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8350 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8351 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8352 ierr = VecDestroy(&v);CHKERRQ(ierr); 8353 } 8354 } 8355 if (reuser) { 8356 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8357 } else { 8358 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8359 } 8360 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8361 PetscScalar *arraym; 8362 const PetscScalar *arrayv; 8363 PetscInt nl; 8364 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8365 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8366 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8367 ierr = VecGetArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8368 ierr = PetscArraycpy(arraym,arrayv,nl);CHKERRQ(ierr); 8369 ierr = VecRestoreArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8370 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8371 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8372 } else { 8373 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8374 } 8375 } else { 8376 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8377 } 8378 if (coarse_mat_is || coarse_mat) { 8379 if (!multilevel_allowed) { 8380 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8381 } else { 8382 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8383 if (coarse_mat_is) { 8384 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8385 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8386 coarse_mat = coarse_mat_is; 8387 } 8388 } 8389 } 8390 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8391 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8392 8393 /* create local to global scatters for coarse problem */ 8394 if (compute_vecs) { 8395 PetscInt lrows; 8396 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8397 if (coarse_mat) { 8398 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8399 } else { 8400 lrows = 0; 8401 } 8402 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8403 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8404 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8405 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8406 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8407 } 8408 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8409 8410 /* set defaults for coarse KSP and PC */ 8411 if (multilevel_allowed) { 8412 coarse_ksp_type = KSPRICHARDSON; 8413 coarse_pc_type = PCBDDC; 8414 } else { 8415 coarse_ksp_type = KSPPREONLY; 8416 coarse_pc_type = PCREDUNDANT; 8417 } 8418 8419 /* print some info if requested */ 8420 if (pcbddc->dbg_flag) { 8421 if (!multilevel_allowed) { 8422 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8423 if (multilevel_requested) { 8424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8425 } else if (pcbddc->max_levels) { 8426 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8427 } 8428 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8429 } 8430 } 8431 8432 /* communicate coarse discrete gradient */ 8433 coarseG = NULL; 8434 if (pcbddc->nedcG && multilevel_allowed) { 8435 MPI_Comm ccomm; 8436 if (coarse_mat) { 8437 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8438 } else { 8439 ccomm = MPI_COMM_NULL; 8440 } 8441 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8442 } 8443 8444 /* create the coarse KSP object only once with defaults */ 8445 if (coarse_mat) { 8446 PetscBool isredundant,isbddc,force,valid; 8447 PetscViewer dbg_viewer = NULL; 8448 8449 if (pcbddc->dbg_flag) { 8450 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8451 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8452 } 8453 if (!pcbddc->coarse_ksp) { 8454 char prefix[256],str_level[16]; 8455 size_t len; 8456 8457 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8458 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8459 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8460 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8461 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8462 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8463 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8464 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8465 /* TODO is this logic correct? should check for coarse_mat type */ 8466 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8467 /* prefix */ 8468 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8469 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8470 if (!pcbddc->current_level) { 8471 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8472 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8473 } else { 8474 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8475 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8476 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8477 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8478 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8479 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8480 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8481 } 8482 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8483 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8484 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8485 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8486 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8487 /* allow user customization */ 8488 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8489 /* get some info after set from options */ 8490 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8491 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8492 force = PETSC_FALSE; 8493 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8494 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8495 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8496 if (multilevel_allowed && !force && !valid) { 8497 isbddc = PETSC_TRUE; 8498 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8499 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8500 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8501 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8502 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8503 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);CHKERRQ(ierr); 8504 ierr = (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);CHKERRQ(ierr); 8505 ierr = PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);CHKERRQ(ierr); 8506 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8507 pc_temp->setfromoptionscalled++; 8508 } 8509 } 8510 } 8511 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8512 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8513 if (nisdofs) { 8514 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8515 for (i=0;i<nisdofs;i++) { 8516 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8517 } 8518 } 8519 if (nisneu) { 8520 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8521 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8522 } 8523 if (nisvert) { 8524 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8525 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8526 } 8527 if (coarseG) { 8528 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8529 } 8530 8531 /* get some info after set from options */ 8532 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8533 8534 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8535 if (isbddc && !multilevel_allowed) { 8536 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8537 } 8538 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8539 force = PETSC_FALSE; 8540 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8541 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8542 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8543 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8544 } 8545 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8546 if (isredundant) { 8547 KSP inner_ksp; 8548 PC inner_pc; 8549 8550 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8551 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8552 } 8553 8554 /* parameters which miss an API */ 8555 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8556 if (isbddc) { 8557 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8558 8559 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8560 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8561 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8562 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8563 if (pcbddc_coarse->benign_saddle_point) { 8564 Mat coarsedivudotp_is; 8565 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8566 IS row,col; 8567 const PetscInt *gidxs; 8568 PetscInt n,st,M,N; 8569 8570 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8571 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8572 st = st-n; 8573 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8574 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8575 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8576 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8577 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8578 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8579 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8580 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8581 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8582 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8583 ierr = ISDestroy(&row);CHKERRQ(ierr); 8584 ierr = ISDestroy(&col);CHKERRQ(ierr); 8585 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8586 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8587 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8588 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8589 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8590 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8591 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8592 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8593 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8594 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8595 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8596 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8597 } 8598 } 8599 8600 /* propagate symmetry info of coarse matrix */ 8601 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8602 if (pc->pmat->symmetric_set) { 8603 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8604 } 8605 if (pc->pmat->hermitian_set) { 8606 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8607 } 8608 if (pc->pmat->spd_set) { 8609 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8610 } 8611 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8612 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8613 } 8614 /* set operators */ 8615 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8616 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8617 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8618 if (pcbddc->dbg_flag) { 8619 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8620 } 8621 } 8622 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8623 ierr = PetscFree(isarray);CHKERRQ(ierr); 8624 #if 0 8625 { 8626 PetscViewer viewer; 8627 char filename[256]; 8628 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8629 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8630 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8631 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8632 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8633 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8634 } 8635 #endif 8636 8637 if (corners) { 8638 Vec gv; 8639 IS is; 8640 const PetscInt *idxs; 8641 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8642 PetscScalar *coords; 8643 8644 if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8645 ierr = VecGetSize(pcbddc->coarse_vec,&N);CHKERRQ(ierr); 8646 ierr = VecGetLocalSize(pcbddc->coarse_vec,&n);CHKERRQ(ierr); 8647 ierr = VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);CHKERRQ(ierr); 8648 ierr = VecSetBlockSize(gv,cdim);CHKERRQ(ierr); 8649 ierr = VecSetSizes(gv,n*cdim,N*cdim);CHKERRQ(ierr); 8650 ierr = VecSetType(gv,VECSTANDARD);CHKERRQ(ierr); 8651 ierr = VecSetFromOptions(gv);CHKERRQ(ierr); 8652 ierr = VecSet(gv,PETSC_MAX_REAL);CHKERRQ(ierr); /* we only propagate coordinates from vertices constraints */ 8653 8654 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8655 ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); 8656 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 8657 ierr = PetscMalloc1(n*cdim,&coords);CHKERRQ(ierr); 8658 for (i=0;i<n;i++) { 8659 for (d=0;d<cdim;d++) { 8660 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8661 } 8662 } 8663 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 8664 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8665 8666 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 8667 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8668 ierr = VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);CHKERRQ(ierr); 8669 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8670 ierr = PetscFree(coords);CHKERRQ(ierr); 8671 ierr = VecAssemblyBegin(gv);CHKERRQ(ierr); 8672 ierr = VecAssemblyEnd(gv);CHKERRQ(ierr); 8673 ierr = VecGetArray(gv,&coords);CHKERRQ(ierr); 8674 if (pcbddc->coarse_ksp) { 8675 PC coarse_pc; 8676 PetscBool isbddc; 8677 8678 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 8679 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 8680 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8681 PetscReal *realcoords; 8682 8683 ierr = VecGetLocalSize(gv,&n);CHKERRQ(ierr); 8684 #if defined(PETSC_USE_COMPLEX) 8685 ierr = PetscMalloc1(n,&realcoords);CHKERRQ(ierr); 8686 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8687 #else 8688 realcoords = coords; 8689 #endif 8690 ierr = PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);CHKERRQ(ierr); 8691 #if defined(PETSC_USE_COMPLEX) 8692 ierr = PetscFree(realcoords);CHKERRQ(ierr); 8693 #endif 8694 } 8695 } 8696 ierr = VecRestoreArray(gv,&coords);CHKERRQ(ierr); 8697 ierr = VecDestroy(&gv);CHKERRQ(ierr); 8698 } 8699 ierr = ISDestroy(&corners);CHKERRQ(ierr); 8700 8701 if (pcbddc->coarse_ksp) { 8702 Vec crhs,csol; 8703 8704 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8705 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8706 if (!csol) { 8707 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8708 } 8709 if (!crhs) { 8710 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8711 } 8712 } 8713 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8714 8715 /* compute null space for coarse solver if the benign trick has been requested */ 8716 if (pcbddc->benign_null) { 8717 8718 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8719 for (i=0;i<pcbddc->benign_n;i++) { 8720 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8721 } 8722 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8723 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8724 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8725 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8726 if (coarse_mat) { 8727 Vec nullv; 8728 PetscScalar *array,*array2; 8729 PetscInt nl; 8730 8731 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8732 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8733 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8734 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8735 ierr = PetscArraycpy(array2,array,nl);CHKERRQ(ierr); 8736 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8737 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8738 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8739 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8740 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8741 } 8742 } 8743 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8744 8745 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8746 if (pcbddc->coarse_ksp) { 8747 PetscBool ispreonly; 8748 8749 if (CoarseNullSpace) { 8750 PetscBool isnull; 8751 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8752 if (isnull) { 8753 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8754 } 8755 /* TODO: add local nullspaces (if any) */ 8756 } 8757 /* setup coarse ksp */ 8758 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8759 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8760 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8761 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8762 KSP check_ksp; 8763 KSPType check_ksp_type; 8764 PC check_pc; 8765 Vec check_vec,coarse_vec; 8766 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8767 PetscInt its; 8768 PetscBool compute_eigs; 8769 PetscReal *eigs_r,*eigs_c; 8770 PetscInt neigs; 8771 const char *prefix; 8772 8773 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8774 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8775 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8776 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8777 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8778 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8779 /* prevent from setup unneeded object */ 8780 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8781 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8782 if (ispreonly) { 8783 check_ksp_type = KSPPREONLY; 8784 compute_eigs = PETSC_FALSE; 8785 } else { 8786 check_ksp_type = KSPGMRES; 8787 compute_eigs = PETSC_TRUE; 8788 } 8789 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8790 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8791 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8792 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8793 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8794 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8795 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8796 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8797 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8798 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8799 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8800 /* create random vec */ 8801 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8802 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8803 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8804 /* solve coarse problem */ 8805 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8806 ierr = KSPCheckSolve(check_ksp,pc,coarse_vec);CHKERRQ(ierr); 8807 /* set eigenvalue estimation if preonly has not been requested */ 8808 if (compute_eigs) { 8809 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8810 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8811 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8812 if (neigs) { 8813 lambda_max = eigs_r[neigs-1]; 8814 lambda_min = eigs_r[0]; 8815 if (pcbddc->use_coarse_estimates) { 8816 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8817 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8818 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8819 } 8820 } 8821 } 8822 } 8823 8824 /* check coarse problem residual error */ 8825 if (pcbddc->dbg_flag) { 8826 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8827 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8828 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8829 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8830 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8831 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8832 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8833 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8834 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8835 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8836 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8837 if (CoarseNullSpace) { 8838 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8839 } 8840 if (compute_eigs) { 8841 PetscReal lambda_max_s,lambda_min_s; 8842 KSPConvergedReason reason; 8843 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8844 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8845 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8846 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8847 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8848 for (i=0;i<neigs;i++) { 8849 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8850 } 8851 } 8852 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8853 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8854 } 8855 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8856 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8857 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8858 if (compute_eigs) { 8859 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8860 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8861 } 8862 } 8863 } 8864 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8865 /* print additional info */ 8866 if (pcbddc->dbg_flag) { 8867 /* waits until all processes reaches this point */ 8868 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8871 } 8872 8873 /* free memory */ 8874 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8875 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8876 PetscFunctionReturn(0); 8877 } 8878 8879 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8880 { 8881 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8882 PC_IS* pcis = (PC_IS*)pc->data; 8883 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8884 IS subset,subset_mult,subset_n; 8885 PetscInt local_size,coarse_size=0; 8886 PetscInt *local_primal_indices=NULL; 8887 const PetscInt *t_local_primal_indices; 8888 PetscErrorCode ierr; 8889 8890 PetscFunctionBegin; 8891 /* Compute global number of coarse dofs */ 8892 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8893 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8894 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8895 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8896 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8897 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8898 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8899 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8900 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8901 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8902 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8903 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8904 ierr = PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);CHKERRQ(ierr); 8905 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8906 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8907 8908 /* check numbering */ 8909 if (pcbddc->dbg_flag) { 8910 PetscScalar coarsesum,*array,*array2; 8911 PetscInt i; 8912 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8913 8914 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8915 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8916 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8917 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8918 /* counter */ 8919 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8920 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8921 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8922 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8923 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8924 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8925 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8926 for (i=0;i<pcbddc->local_primal_size;i++) { 8927 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8928 } 8929 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8930 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8931 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8932 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8933 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8934 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8935 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8936 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8937 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8938 for (i=0;i<pcis->n;i++) { 8939 if (array[i] != 0.0 && array[i] != array2[i]) { 8940 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8941 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8942 set_error = PETSC_TRUE; 8943 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8944 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8945 } 8946 } 8947 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8948 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8949 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8950 for (i=0;i<pcis->n;i++) { 8951 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8952 } 8953 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8954 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8955 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8956 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8957 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8958 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8959 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8960 PetscInt *gidxs; 8961 8962 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8963 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8965 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8966 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8967 for (i=0;i<pcbddc->local_primal_size;i++) { 8968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8969 } 8970 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8971 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8972 } 8973 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8974 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8975 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8976 } 8977 8978 /* get back data */ 8979 *coarse_size_n = coarse_size; 8980 *local_primal_indices_n = local_primal_indices; 8981 PetscFunctionReturn(0); 8982 } 8983 8984 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8985 { 8986 IS localis_t; 8987 PetscInt i,lsize,*idxs,n; 8988 PetscScalar *vals; 8989 PetscErrorCode ierr; 8990 8991 PetscFunctionBegin; 8992 /* get indices in local ordering exploiting local to global map */ 8993 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8994 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8995 for (i=0;i<lsize;i++) vals[i] = 1.0; 8996 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8997 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8998 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8999 if (idxs) { /* multilevel guard */ 9000 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 9001 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 9002 } 9003 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 9004 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9005 ierr = PetscFree(vals);CHKERRQ(ierr); 9006 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 9007 /* now compute set in local ordering */ 9008 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9009 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9010 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9011 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 9012 for (i=0,lsize=0;i<n;i++) { 9013 if (PetscRealPart(vals[i]) > 0.5) { 9014 lsize++; 9015 } 9016 } 9017 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 9018 for (i=0,lsize=0;i<n;i++) { 9019 if (PetscRealPart(vals[i]) > 0.5) { 9020 idxs[lsize++] = i; 9021 } 9022 } 9023 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9024 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 9025 *localis = localis_t; 9026 PetscFunctionReturn(0); 9027 } 9028 9029 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9030 { 9031 PC_IS *pcis=(PC_IS*)pc->data; 9032 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9033 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9034 Mat S_j; 9035 PetscInt *used_xadj,*used_adjncy; 9036 PetscBool free_used_adj; 9037 PetscErrorCode ierr; 9038 9039 PetscFunctionBegin; 9040 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9041 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9042 free_used_adj = PETSC_FALSE; 9043 if (pcbddc->sub_schurs_layers == -1) { 9044 used_xadj = NULL; 9045 used_adjncy = NULL; 9046 } else { 9047 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9048 used_xadj = pcbddc->mat_graph->xadj; 9049 used_adjncy = pcbddc->mat_graph->adjncy; 9050 } else if (pcbddc->computed_rowadj) { 9051 used_xadj = pcbddc->mat_graph->xadj; 9052 used_adjncy = pcbddc->mat_graph->adjncy; 9053 } else { 9054 PetscBool flg_row=PETSC_FALSE; 9055 const PetscInt *xadj,*adjncy; 9056 PetscInt nvtxs; 9057 9058 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9059 if (flg_row) { 9060 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 9061 ierr = PetscArraycpy(used_xadj,xadj,nvtxs+1);CHKERRQ(ierr); 9062 ierr = PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);CHKERRQ(ierr); 9063 free_used_adj = PETSC_TRUE; 9064 } else { 9065 pcbddc->sub_schurs_layers = -1; 9066 used_xadj = NULL; 9067 used_adjncy = NULL; 9068 } 9069 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9070 } 9071 } 9072 9073 /* setup sub_schurs data */ 9074 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9075 if (!sub_schurs->schur_explicit) { 9076 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9077 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9078 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 9079 } else { 9080 Mat change = NULL; 9081 Vec scaling = NULL; 9082 IS change_primal = NULL, iP; 9083 PetscInt benign_n; 9084 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9085 PetscBool need_change = PETSC_FALSE; 9086 PetscBool discrete_harmonic = PETSC_FALSE; 9087 9088 if (!pcbddc->use_vertices && reuse_solvers) { 9089 PetscInt n_vertices; 9090 9091 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 9092 reuse_solvers = (PetscBool)!n_vertices; 9093 } 9094 if (!pcbddc->benign_change_explicit) { 9095 benign_n = pcbddc->benign_n; 9096 } else { 9097 benign_n = 0; 9098 } 9099 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9100 We need a global reduction to avoid possible deadlocks. 9101 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9102 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9103 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9104 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 9105 need_change = (PetscBool)(!need_change); 9106 } 9107 /* If the user defines additional constraints, we import them here. 9108 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9109 if (need_change) { 9110 PC_IS *pcisf; 9111 PC_BDDC *pcbddcf; 9112 PC pcf; 9113 9114 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9115 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 9116 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 9117 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 9118 9119 /* hacks */ 9120 pcisf = (PC_IS*)pcf->data; 9121 pcisf->is_B_local = pcis->is_B_local; 9122 pcisf->vec1_N = pcis->vec1_N; 9123 pcisf->BtoNmap = pcis->BtoNmap; 9124 pcisf->n = pcis->n; 9125 pcisf->n_B = pcis->n_B; 9126 pcbddcf = (PC_BDDC*)pcf->data; 9127 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 9128 pcbddcf->mat_graph = pcbddc->mat_graph; 9129 pcbddcf->use_faces = PETSC_TRUE; 9130 pcbddcf->use_change_of_basis = PETSC_TRUE; 9131 pcbddcf->use_change_on_faces = PETSC_TRUE; 9132 pcbddcf->use_qr_single = PETSC_TRUE; 9133 pcbddcf->fake_change = PETSC_TRUE; 9134 9135 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9136 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 9137 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9138 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 9139 change = pcbddcf->ConstraintMatrix; 9140 pcbddcf->ConstraintMatrix = NULL; 9141 9142 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9143 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 9144 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 9145 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 9146 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 9147 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 9148 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 9149 pcf->ops->destroy = NULL; 9150 pcf->ops->reset = NULL; 9151 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 9152 } 9153 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9154 9155 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 9156 if (iP) { 9157 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 9158 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 9159 ierr = PetscOptionsEnd();CHKERRQ(ierr); 9160 } 9161 if (discrete_harmonic) { 9162 Mat A; 9163 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 9164 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 9165 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 9166 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9167 ierr = MatDestroy(&A);CHKERRQ(ierr); 9168 } else { 9169 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9170 } 9171 ierr = MatDestroy(&change);CHKERRQ(ierr); 9172 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 9173 } 9174 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9175 9176 /* free adjacency */ 9177 if (free_used_adj) { 9178 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 9179 } 9180 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9181 PetscFunctionReturn(0); 9182 } 9183 9184 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9185 { 9186 PC_IS *pcis=(PC_IS*)pc->data; 9187 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9188 PCBDDCGraph graph; 9189 PetscErrorCode ierr; 9190 9191 PetscFunctionBegin; 9192 /* attach interface graph for determining subsets */ 9193 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9194 IS verticesIS,verticescomm; 9195 PetscInt vsize,*idxs; 9196 9197 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9198 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 9199 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9200 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 9201 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9202 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9203 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 9204 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 9205 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 9206 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 9207 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 9208 } else { 9209 graph = pcbddc->mat_graph; 9210 } 9211 /* print some info */ 9212 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9213 IS vertices; 9214 PetscInt nv,nedges,nfaces; 9215 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 9216 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9217 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 9218 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9219 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 9220 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 9221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 9222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 9223 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9224 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9225 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9226 } 9227 9228 /* sub_schurs init */ 9229 if (!pcbddc->sub_schurs) { 9230 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 9231 } 9232 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 9233 9234 /* free graph struct */ 9235 if (pcbddc->sub_schurs_rebuild) { 9236 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 9237 } 9238 PetscFunctionReturn(0); 9239 } 9240 9241 PetscErrorCode PCBDDCCheckOperator(PC pc) 9242 { 9243 PC_IS *pcis=(PC_IS*)pc->data; 9244 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9245 PetscErrorCode ierr; 9246 9247 PetscFunctionBegin; 9248 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9249 IS zerodiag = NULL; 9250 Mat S_j,B0_B=NULL; 9251 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9252 PetscScalar *p0_check,*array,*array2; 9253 PetscReal norm; 9254 PetscInt i; 9255 9256 /* B0 and B0_B */ 9257 if (zerodiag) { 9258 IS dummy; 9259 9260 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 9261 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 9262 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 9263 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 9264 } 9265 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9266 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 9267 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 9268 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9269 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9270 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9271 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9272 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 9273 /* S_j */ 9274 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9275 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9276 9277 /* mimic vector in \widetilde{W}_\Gamma */ 9278 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 9279 /* continuous in primal space */ 9280 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 9281 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9282 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9283 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9284 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 9285 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9286 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9287 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9288 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9289 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9290 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9291 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9292 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 9293 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 9294 9295 /* assemble rhs for coarse problem */ 9296 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9297 /* local with Schur */ 9298 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 9299 if (zerodiag) { 9300 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9301 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9302 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9303 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 9304 } 9305 /* sum on primal nodes the local contributions */ 9306 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9307 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9308 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9309 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9310 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9311 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9312 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9313 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 9314 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9315 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9316 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9317 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9318 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9319 /* scale primal nodes (BDDC sums contibutions) */ 9320 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 9321 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9322 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9323 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9324 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9325 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9326 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9327 /* global: \widetilde{B0}_B w_\Gamma */ 9328 if (zerodiag) { 9329 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 9330 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9331 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9332 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9333 } 9334 /* BDDC */ 9335 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 9336 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 9337 9338 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 9339 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 9340 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 9341 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 9342 for (i=0;i<pcbddc->benign_n;i++) { 9343 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 9344 } 9345 ierr = PetscFree(p0_check);CHKERRQ(ierr); 9346 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 9347 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 9348 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 9349 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9350 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 9351 } 9352 PetscFunctionReturn(0); 9353 } 9354 9355 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9356 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9357 { 9358 Mat At; 9359 IS rows; 9360 PetscInt rst,ren; 9361 PetscErrorCode ierr; 9362 PetscLayout rmap; 9363 9364 PetscFunctionBegin; 9365 rst = ren = 0; 9366 if (ccomm != MPI_COMM_NULL) { 9367 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9368 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9369 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9370 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9371 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9372 } 9373 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9374 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9375 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9376 9377 if (ccomm != MPI_COMM_NULL) { 9378 Mat_MPIAIJ *a,*b; 9379 IS from,to; 9380 Vec gvec; 9381 PetscInt lsize; 9382 9383 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9384 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9385 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9386 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9387 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9388 a = (Mat_MPIAIJ*)At->data; 9389 b = (Mat_MPIAIJ*)(*B)->data; 9390 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9391 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9392 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9393 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9394 b->A = a->A; 9395 b->B = a->B; 9396 9397 b->donotstash = a->donotstash; 9398 b->roworiented = a->roworiented; 9399 b->rowindices = 0; 9400 b->rowvalues = 0; 9401 b->getrowactive = PETSC_FALSE; 9402 9403 (*B)->rmap = rmap; 9404 (*B)->factortype = A->factortype; 9405 (*B)->assembled = PETSC_TRUE; 9406 (*B)->insertmode = NOT_SET_VALUES; 9407 (*B)->preallocated = PETSC_TRUE; 9408 9409 if (a->colmap) { 9410 #if defined(PETSC_USE_CTABLE) 9411 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9412 #else 9413 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9414 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9415 ierr = PetscArraycpy(b->colmap,a->colmap,At->cmap->N);CHKERRQ(ierr); 9416 #endif 9417 } else b->colmap = 0; 9418 if (a->garray) { 9419 PetscInt len; 9420 len = a->B->cmap->n; 9421 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9422 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9423 if (len) { ierr = PetscArraycpy(b->garray,a->garray,len);CHKERRQ(ierr); } 9424 } else b->garray = 0; 9425 9426 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9427 b->lvec = a->lvec; 9428 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9429 9430 /* cannot use VecScatterCopy */ 9431 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9432 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9433 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9434 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9435 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9436 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9437 ierr = ISDestroy(&from);CHKERRQ(ierr); 9438 ierr = ISDestroy(&to);CHKERRQ(ierr); 9439 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9440 } 9441 ierr = MatDestroy(&At);CHKERRQ(ierr); 9442 PetscFunctionReturn(0); 9443 } 9444