1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 PetscErrorCode ierr; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 28 if (!nr || !nc) PetscFunctionReturn(0); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 33 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr,nc); 39 if (!rwork) { 40 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 50 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 51 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 #else 55 ierr = PetscMalloc1(5*n,&rwork2);CHKERRQ(ierr); 56 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 57 ierr = PetscFree(rwork2);CHKERRQ(ierr); 58 #endif 59 ierr = PetscFPTrapPop();CHKERRQ(ierr); 60 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 61 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 62 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 63 if (!rwork) { 64 ierr = PetscFree(sing);CHKERRQ(ierr); 65 } 66 if (!work) { 67 ierr = PetscFree(uwork);CHKERRQ(ierr); 68 } 69 /* create B */ 70 if (!range) { 71 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 72 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscArraycpy(data,U+nr*i,(nr-i)*nr);CHKERRQ(ierr); 74 } else { 75 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 76 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 77 ierr = PetscArraycpy(data,U,i*nr);CHKERRQ(ierr); 78 } 79 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 80 ierr = PetscFree(U);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 const PetscScalar *vals; 122 PetscScalar v; 123 124 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 125 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 126 ierr = MatDenseGetArrayRead(GEd,&vals);CHKERRQ(ierr); 127 /* v = PetscAbsScalar(vals[0]) */; 128 v = 1.; 129 cvals[0] = vals[0]/v; 130 cvals[1] = vals[1]/v; 131 ierr = MatDenseRestoreArrayRead(GEd,&vals);CHKERRQ(ierr); 132 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 133 #if defined(PRINT_GDET) 134 { 135 PetscViewer viewer; 136 char filename[256]; 137 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 138 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 139 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 141 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 142 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 143 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 144 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 145 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 146 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 147 } 148 #endif 149 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 150 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 151 } 152 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode PCBDDCNedelecSupport(PC pc) 157 { 158 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 159 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 160 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 161 Vec tvec; 162 PetscSF sfv; 163 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 164 MPI_Comm comm; 165 IS lned,primals,allprimals,nedfieldlocal; 166 IS *eedges,*extrows,*extcols,*alleedges; 167 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 168 PetscScalar *vals,*work; 169 PetscReal *rwork; 170 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 171 PetscInt ne,nv,Lv,order,n,field; 172 PetscInt n_neigh,*neigh,*n_shared,**shared; 173 PetscInt i,j,extmem,cum,maxsize,nee; 174 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 175 PetscInt *sfvleaves,*sfvroots; 176 PetscInt *corners,*cedges; 177 PetscInt *ecount,**eneighs,*vcount,**vneighs; 178 PetscInt *emarks; 179 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 /* If the discrete gradient is defined for a subset of dofs and global is true, 184 it assumes G is given in global ordering for all the dofs. 185 Otherwise, the ordering is global for the Nedelec field */ 186 order = pcbddc->nedorder; 187 conforming = pcbddc->conforming; 188 field = pcbddc->nedfield; 189 global = pcbddc->nedglobal; 190 setprimal = PETSC_FALSE; 191 print = PETSC_FALSE; 192 singular = PETSC_FALSE; 193 194 /* Command line customization */ 195 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 196 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 199 /* print debug info TODO: to be removed */ 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsEnd();CHKERRQ(ierr); 202 203 /* Return if there are no edges in the decomposition and the problem is not singular */ 204 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 205 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 206 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 207 if (!singular) { 208 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 209 lrc[0] = PETSC_FALSE; 210 for (i=0;i<n;i++) { 211 if (PetscRealPart(vals[i]) > 2.) { 212 lrc[0] = PETSC_TRUE; 213 break; 214 } 215 } 216 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 217 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 218 if (!lrc[1]) PetscFunctionReturn(0); 219 } 220 221 /* Get Nedelec field */ 222 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 223 if (pcbddc->n_ISForDofsLocal && field >= 0) { 224 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 225 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 226 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 227 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 228 ne = n; 229 nedfieldlocal = NULL; 230 global = PETSC_TRUE; 231 } else if (field == PETSC_DECIDE) { 232 PetscInt rst,ren,*idx; 233 234 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 235 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 236 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 237 for (i=rst;i<ren;i++) { 238 PetscInt nc; 239 240 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 242 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 } 244 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 245 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 246 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 247 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 248 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 249 } else { 250 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 251 } 252 253 /* Sanity checks */ 254 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 255 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 256 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 257 258 /* Just set primal dofs and return */ 259 if (setprimal) { 260 IS enedfieldlocal; 261 PetscInt *eidxs; 262 263 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 264 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 265 if (nedfieldlocal) { 266 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 267 for (i=0,cum=0;i<ne;i++) { 268 if (PetscRealPart(vals[idxs[i]]) > 2.) { 269 eidxs[cum++] = idxs[i]; 270 } 271 } 272 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 273 } else { 274 for (i=0,cum=0;i<ne;i++) { 275 if (PetscRealPart(vals[i]) > 2.) { 276 eidxs[cum++] = i; 277 } 278 } 279 } 280 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 281 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 282 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 283 ierr = PetscFree(eidxs);CHKERRQ(ierr); 284 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 285 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* Compute some l2g maps */ 290 if (nedfieldlocal) { 291 IS is; 292 293 /* need to map from the local Nedelec field to local numbering */ 294 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 295 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 296 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 297 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 298 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 299 if (global) { 300 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 301 el2g = al2g; 302 } else { 303 IS gis; 304 305 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 306 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 307 ierr = ISDestroy(&gis);CHKERRQ(ierr); 308 } 309 ierr = ISDestroy(&is);CHKERRQ(ierr); 310 } else { 311 /* restore default */ 312 pcbddc->nedfield = -1; 313 /* one ref for the destruction of al2g, one for el2g */ 314 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 el2g = al2g; 317 fl2g = NULL; 318 } 319 320 /* Start communication to drop connections for interior edges (for cc analysis only) */ 321 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 322 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 323 if (nedfieldlocal) { 324 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 326 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 } else { 328 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 329 } 330 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 331 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 333 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 334 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 335 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 336 if (global) { 337 PetscInt rst; 338 339 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 340 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 341 if (matis->sf_rootdata[i] < 2) { 342 matis->sf_rootdata[cum++] = i + rst; 343 } 344 } 345 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 346 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 347 } else { 348 PetscInt *tbz; 349 350 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 351 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 352 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 353 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 354 for (i=0,cum=0;i<ne;i++) 355 if (matis->sf_leafdata[idxs[i]] == 1) 356 tbz[cum++] = i; 357 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 359 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 360 ierr = PetscFree(tbz);CHKERRQ(ierr); 361 } 362 } else { /* we need the entire G to infer the nullspace */ 363 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 364 G = pcbddc->discretegradient; 365 } 366 367 /* Extract subdomain relevant rows of G */ 368 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 369 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 370 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 371 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 372 ierr = ISDestroy(&lned);CHKERRQ(ierr); 373 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 374 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 375 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 376 377 /* SF for nodal dofs communications */ 378 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 379 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 380 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 382 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 384 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 386 i = singular ? 2 : 1; 387 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 388 389 /* Destroy temporary G created in MATIS format and modified G */ 390 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 391 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 392 ierr = MatDestroy(&G);CHKERRQ(ierr); 393 394 if (print) { 395 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 396 ierr = MatView(lG,NULL);CHKERRQ(ierr); 397 } 398 399 /* Save lG for values insertion in change of basis */ 400 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 401 402 /* Analyze the edge-nodes connections (duplicate lG) */ 403 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 404 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 405 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 410 /* need to import the boundary specification to ensure the 411 proper detection of coarse edges' endpoints */ 412 if (pcbddc->DirichletBoundariesLocal) { 413 IS is; 414 415 if (fl2g) { 416 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 417 } else { 418 is = pcbddc->DirichletBoundariesLocal; 419 } 420 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 421 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 422 for (i=0;i<cum;i++) { 423 if (idxs[i] >= 0) { 424 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 425 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 426 } 427 } 428 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 429 if (fl2g) { 430 ierr = ISDestroy(&is);CHKERRQ(ierr); 431 } 432 } 433 if (pcbddc->NeumannBoundariesLocal) { 434 IS is; 435 436 if (fl2g) { 437 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 438 } else { 439 is = pcbddc->NeumannBoundariesLocal; 440 } 441 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 442 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 443 for (i=0;i<cum;i++) { 444 if (idxs[i] >= 0) { 445 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 446 } 447 } 448 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 449 if (fl2g) { 450 ierr = ISDestroy(&is);CHKERRQ(ierr); 451 } 452 } 453 454 /* Count neighs per dof */ 455 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 456 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 457 458 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 459 for proper detection of coarse edges' endpoints */ 460 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 461 for (i=0;i<ne;i++) { 462 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 463 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 467 if (!conforming) { 468 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 469 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 470 } 471 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 472 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 473 cum = 0; 474 for (i=0;i<ne;i++) { 475 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 476 if (!PetscBTLookup(btee,i)) { 477 marks[cum++] = i; 478 continue; 479 } 480 /* set badly connected edge dofs as primal */ 481 if (!conforming) { 482 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 483 marks[cum++] = i; 484 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 485 for (j=ii[i];j<ii[i+1];j++) { 486 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 487 } 488 } else { 489 /* every edge dofs should be connected trough a certain number of nodal dofs 490 to other edge dofs belonging to coarse edges 491 - at most 2 endpoints 492 - order-1 interior nodal dofs 493 - no undefined nodal dofs (nconn < order) 494 */ 495 PetscInt ends = 0,ints = 0, undef = 0; 496 for (j=ii[i];j<ii[i+1];j++) { 497 PetscInt v = jj[j],k; 498 PetscInt nconn = iit[v+1]-iit[v]; 499 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 500 if (nconn > order) ends++; 501 else if (nconn == order) ints++; 502 else undef++; 503 } 504 if (undef || ends > 2 || ints != order -1) { 505 marks[cum++] = i; 506 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 507 for (j=ii[i];j<ii[i+1];j++) { 508 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 509 } 510 } 511 } 512 } 513 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 514 if (!order && ii[i+1] != ii[i]) { 515 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 516 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 517 } 518 } 519 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 520 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 521 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 if (!conforming) { 523 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 524 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 525 } 526 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 527 528 /* identify splitpoints and corner candidates */ 529 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 530 if (print) { 531 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 532 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 533 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 534 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 535 } 536 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 537 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 538 for (i=0;i<nv;i++) { 539 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 540 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 541 if (!order) { /* variable order */ 542 PetscReal vorder = 0.; 543 544 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 545 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 546 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 547 ord = 1; 548 } 549 if (PetscUnlikelyDebug(test%ord)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 550 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 551 if (PetscBTLookup(btbd,jj[j])) { 552 bdir = PETSC_TRUE; 553 break; 554 } 555 if (vc != ecount[jj[j]]) { 556 sneighs = PETSC_FALSE; 557 } else { 558 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 559 for (k=0;k<vc;k++) { 560 if (vn[k] != en[k]) { 561 sneighs = PETSC_FALSE; 562 break; 563 } 564 } 565 } 566 } 567 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 568 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 569 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 570 } else if (test == ord) { 571 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 573 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 574 } else { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 576 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 577 } 578 } 579 } 580 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 581 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 582 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 583 584 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 585 if (order != 1) { 586 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 587 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 if (PetscBTLookup(btvcand,i)) { 590 PetscBool found = PETSC_FALSE; 591 for (j=ii[i];j<ii[i+1] && !found;j++) { 592 PetscInt k,e = jj[j]; 593 if (PetscBTLookup(bte,e)) continue; 594 for (k=iit[e];k<iit[e+1];k++) { 595 PetscInt v = jjt[k]; 596 if (v != i && PetscBTLookup(btvcand,v)) { 597 found = PETSC_TRUE; 598 break; 599 } 600 } 601 } 602 if (!found) { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 604 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 605 } else { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 607 } 608 } 609 } 610 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 611 } 612 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 613 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 614 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 615 616 /* Get the local G^T explicitly */ 617 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 618 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 619 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 620 621 /* Mark interior nodal dofs */ 622 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 623 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 624 for (i=1;i<n_neigh;i++) { 625 for (j=0;j<n_shared[i];j++) { 626 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 627 } 628 } 629 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 630 631 /* communicate corners and splitpoints */ 632 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 633 ierr = PetscArrayzero(sfvleaves,nv);CHKERRQ(ierr); 634 ierr = PetscArrayzero(sfvroots,Lv);CHKERRQ(ierr); 635 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 636 637 if (print) { 638 IS tbz; 639 640 cum = 0; 641 for (i=0;i<nv;i++) 642 if (sfvleaves[i]) 643 vmarks[cum++] = i; 644 645 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 646 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 647 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 648 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 649 } 650 651 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 652 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 653 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 654 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 655 656 /* Zero rows of lGt corresponding to identified corners 657 and interior nodal dofs */ 658 cum = 0; 659 for (i=0;i<nv;i++) { 660 if (sfvleaves[i]) { 661 vmarks[cum++] = i; 662 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 663 } 664 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 665 } 666 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 667 if (print) { 668 IS tbz; 669 670 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 671 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 672 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 673 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 674 } 675 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 676 ierr = PetscFree(vmarks);CHKERRQ(ierr); 677 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 678 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 679 680 /* Recompute G */ 681 ierr = MatDestroy(&lG);CHKERRQ(ierr); 682 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 683 if (print) { 684 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 685 ierr = MatView(lG,NULL);CHKERRQ(ierr); 686 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 687 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 688 } 689 690 /* Get primal dofs (if any) */ 691 cum = 0; 692 for (i=0;i<ne;i++) { 693 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 694 } 695 if (fl2g) { 696 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 697 } 698 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 699 if (print) { 700 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 701 ierr = ISView(primals,NULL);CHKERRQ(ierr); 702 } 703 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 704 /* TODO: what if the user passed in some of them ? */ 705 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 706 ierr = ISDestroy(&primals);CHKERRQ(ierr); 707 708 /* Compute edge connectivity */ 709 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 710 711 /* Symbolic conn = lG*lGt */ 712 ierr = MatProductCreate(lG,lGt,NULL,&conn);CHKERRQ(ierr); 713 ierr = MatProductSetType(conn,MATPRODUCT_AB);CHKERRQ(ierr); 714 ierr = MatProductSetAlgorithm(conn,"default");CHKERRQ(ierr); 715 ierr = MatProductSetFill(conn,PETSC_DEFAULT);CHKERRQ(ierr); 716 ierr = PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");CHKERRQ(ierr); 717 ierr = MatProductSetFromOptions(conn);CHKERRQ(ierr); 718 ierr = MatProductSymbolic(conn);CHKERRQ(ierr); 719 720 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 721 if (fl2g) { 722 PetscBT btf; 723 PetscInt *iia,*jja,*iiu,*jju; 724 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 725 726 /* create CSR for all local dofs */ 727 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 728 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 729 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 730 iiu = pcbddc->mat_graph->xadj; 731 jju = pcbddc->mat_graph->adjncy; 732 } else if (pcbddc->use_local_adj) { 733 rest = PETSC_TRUE; 734 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 735 } else { 736 free = PETSC_TRUE; 737 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 738 iiu[0] = 0; 739 for (i=0;i<n;i++) { 740 iiu[i+1] = i+1; 741 jju[i] = -1; 742 } 743 } 744 745 /* import sizes of CSR */ 746 iia[0] = 0; 747 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 748 749 /* overwrite entries corresponding to the Nedelec field */ 750 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 751 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 752 for (i=0;i<ne;i++) { 753 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 754 iia[idxs[i]+1] = ii[i+1]-ii[i]; 755 } 756 757 /* iia in CSR */ 758 for (i=0;i<n;i++) iia[i+1] += iia[i]; 759 760 /* jja in CSR */ 761 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 762 for (i=0;i<n;i++) 763 if (!PetscBTLookup(btf,i)) 764 for (j=0;j<iiu[i+1]-iiu[i];j++) 765 jja[iia[i]+j] = jju[iiu[i]+j]; 766 767 /* map edge dofs connectivity */ 768 if (jj) { 769 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 770 for (i=0;i<ne;i++) { 771 PetscInt e = idxs[i]; 772 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 773 } 774 } 775 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 776 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 777 if (rest) { 778 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 779 } 780 if (free) { 781 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 782 } 783 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 784 } else { 785 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 786 } 787 788 /* Analyze interface for edge dofs */ 789 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 790 pcbddc->mat_graph->twodim = PETSC_FALSE; 791 792 /* Get coarse edges in the edge space */ 793 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 794 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 795 796 if (fl2g) { 797 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 798 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 799 for (i=0;i<nee;i++) { 800 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 801 } 802 } else { 803 eedges = alleedges; 804 primals = allprimals; 805 } 806 807 /* Mark fine edge dofs with their coarse edge id */ 808 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 809 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 810 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 811 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 812 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 813 if (print) { 814 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 815 ierr = ISView(primals,NULL);CHKERRQ(ierr); 816 } 817 818 maxsize = 0; 819 for (i=0;i<nee;i++) { 820 PetscInt size,mark = i+1; 821 822 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 823 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 824 for (j=0;j<size;j++) marks[idxs[j]] = mark; 825 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 826 maxsize = PetscMax(maxsize,size); 827 } 828 829 /* Find coarse edge endpoints */ 830 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 831 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 832 for (i=0;i<nee;i++) { 833 PetscInt mark = i+1,size; 834 835 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 836 if (!size && nedfieldlocal) continue; 837 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 838 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 839 if (print) { 840 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 841 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 842 } 843 for (j=0;j<size;j++) { 844 PetscInt k, ee = idxs[j]; 845 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 846 for (k=ii[ee];k<ii[ee+1];k++) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 848 if (PetscBTLookup(btv,jj[k])) { 849 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 850 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 851 PetscInt k2; 852 PetscBool corner = PETSC_FALSE; 853 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 854 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 855 /* it's a corner if either is connected with an edge dof belonging to a different cc or 856 if the edge dof lie on the natural part of the boundary */ 857 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 858 corner = PETSC_TRUE; 859 break; 860 } 861 } 862 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 863 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 864 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 865 } else { 866 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 867 } 868 } 869 } 870 } 871 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 872 } 873 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 874 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 875 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 876 877 /* Reset marked primal dofs */ 878 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 879 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 880 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 881 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 882 883 /* Now use the initial lG */ 884 ierr = MatDestroy(&lG);CHKERRQ(ierr); 885 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 886 lG = lGinit; 887 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 888 889 /* Compute extended cols indices */ 890 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 891 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 892 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 893 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 894 i *= maxsize; 895 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 896 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 897 eerr = PETSC_FALSE; 898 for (i=0;i<nee;i++) { 899 PetscInt size,found = 0; 900 901 cum = 0; 902 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 903 if (!size && nedfieldlocal) continue; 904 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 905 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 906 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 907 for (j=0;j<size;j++) { 908 PetscInt k,ee = idxs[j]; 909 for (k=ii[ee];k<ii[ee+1];k++) { 910 PetscInt vv = jj[k]; 911 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 912 else if (!PetscBTLookupSet(btvc,vv)) found++; 913 } 914 } 915 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 916 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 917 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 918 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 919 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 920 /* it may happen that endpoints are not defined at this point 921 if it is the case, mark this edge for a second pass */ 922 if (cum != size -1 || found != 2) { 923 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 924 if (print) { 925 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 926 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 927 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 928 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 929 } 930 eerr = PETSC_TRUE; 931 } 932 } 933 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 934 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 935 if (done) { 936 PetscInt *newprimals; 937 938 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 939 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 940 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 941 ierr = PetscArraycpy(newprimals,idxs,cum);CHKERRQ(ierr); 942 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 944 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 945 for (i=0;i<nee;i++) { 946 PetscBool has_candidates = PETSC_FALSE; 947 if (PetscBTLookup(bter,i)) { 948 PetscInt size,mark = i+1; 949 950 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 951 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 952 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 953 for (j=0;j<size;j++) { 954 PetscInt k,ee = idxs[j]; 955 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 956 for (k=ii[ee];k<ii[ee+1];k++) { 957 /* set all candidates located on the edge as corners */ 958 if (PetscBTLookup(btvcand,jj[k])) { 959 PetscInt k2,vv = jj[k]; 960 has_candidates = PETSC_TRUE; 961 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 962 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 963 /* set all edge dofs connected to candidate as primals */ 964 for (k2=iit[vv];k2<iit[vv+1];k2++) { 965 if (marks[jjt[k2]] == mark) { 966 PetscInt k3,ee2 = jjt[k2]; 967 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 968 newprimals[cum++] = ee2; 969 /* finally set the new corners */ 970 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 971 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 972 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 973 } 974 } 975 } 976 } else { 977 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 978 } 979 } 980 } 981 if (!has_candidates) { /* circular edge */ 982 PetscInt k, ee = idxs[0],*tmarks; 983 984 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 985 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 986 for (k=ii[ee];k<ii[ee+1];k++) { 987 PetscInt k2; 988 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 989 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 990 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 991 } 992 for (j=0;j<size;j++) { 993 if (tmarks[idxs[j]] > 1) { 994 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 995 newprimals[cum++] = idxs[j]; 996 } 997 } 998 ierr = PetscFree(tmarks);CHKERRQ(ierr); 999 } 1000 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1001 } 1002 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1003 } 1004 ierr = PetscFree(extcols);CHKERRQ(ierr); 1005 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1006 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1007 if (fl2g) { 1008 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1009 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1010 for (i=0;i<nee;i++) { 1011 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1012 } 1013 ierr = PetscFree(eedges);CHKERRQ(ierr); 1014 } 1015 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1016 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1017 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1018 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1019 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1020 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1021 pcbddc->mat_graph->twodim = PETSC_FALSE; 1022 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1023 if (fl2g) { 1024 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1025 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1026 for (i=0;i<nee;i++) { 1027 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1028 } 1029 } else { 1030 eedges = alleedges; 1031 primals = allprimals; 1032 } 1033 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1034 1035 /* Mark again */ 1036 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 1037 for (i=0;i<nee;i++) { 1038 PetscInt size,mark = i+1; 1039 1040 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1041 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1042 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1043 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1044 } 1045 if (print) { 1046 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1047 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1048 } 1049 1050 /* Recompute extended cols */ 1051 eerr = PETSC_FALSE; 1052 for (i=0;i<nee;i++) { 1053 PetscInt size; 1054 1055 cum = 0; 1056 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1057 if (!size && nedfieldlocal) continue; 1058 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1059 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1060 for (j=0;j<size;j++) { 1061 PetscInt k,ee = idxs[j]; 1062 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1063 } 1064 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1065 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1066 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1067 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1068 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1069 if (cum != size -1) { 1070 if (print) { 1071 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1072 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1073 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1074 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1075 } 1076 eerr = PETSC_TRUE; 1077 } 1078 } 1079 } 1080 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1081 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1082 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1083 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1084 /* an error should not occur at this point */ 1085 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1086 1087 /* Check the number of endpoints */ 1088 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1089 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1090 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1091 for (i=0;i<nee;i++) { 1092 PetscInt size, found = 0, gc[2]; 1093 1094 /* init with defaults */ 1095 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1096 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1097 if (!size && nedfieldlocal) continue; 1098 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1099 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1100 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1101 for (j=0;j<size;j++) { 1102 PetscInt k,ee = idxs[j]; 1103 for (k=ii[ee];k<ii[ee+1];k++) { 1104 PetscInt vv = jj[k]; 1105 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1106 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1107 corners[i*2+found++] = vv; 1108 } 1109 } 1110 } 1111 if (found != 2) { 1112 PetscInt e; 1113 if (fl2g) { 1114 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1115 } else { 1116 e = idxs[0]; 1117 } 1118 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1119 } 1120 1121 /* get primal dof index on this coarse edge */ 1122 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1123 if (gc[0] > gc[1]) { 1124 PetscInt swap = corners[2*i]; 1125 corners[2*i] = corners[2*i+1]; 1126 corners[2*i+1] = swap; 1127 } 1128 cedges[i] = idxs[size-1]; 1129 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1130 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1131 } 1132 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1134 1135 if (PetscDefined(USE_DEBUG)) { 1136 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1137 not interfere with neighbouring coarse edges */ 1138 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1139 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 for (i=0;i<nv;i++) { 1141 PetscInt emax = 0,eemax = 0; 1142 1143 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1144 ierr = PetscArrayzero(emarks,nee+1);CHKERRQ(ierr); 1145 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1146 for (j=1;j<nee+1;j++) { 1147 if (emax < emarks[j]) { 1148 emax = emarks[j]; 1149 eemax = j; 1150 } 1151 } 1152 /* not relevant for edges */ 1153 if (!eemax) continue; 1154 1155 for (j=ii[i];j<ii[i+1];j++) { 1156 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1157 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1158 } 1159 } 1160 } 1161 ierr = PetscFree(emarks);CHKERRQ(ierr); 1162 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1163 } 1164 1165 /* Compute extended rows indices for edge blocks of the change of basis */ 1166 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1167 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1168 extmem *= maxsize; 1169 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1170 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1171 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1172 for (i=0;i<nv;i++) { 1173 PetscInt mark = 0,size,start; 1174 1175 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1176 for (j=ii[i];j<ii[i+1];j++) 1177 if (marks[jj[j]] && !mark) 1178 mark = marks[jj[j]]; 1179 1180 /* not relevant */ 1181 if (!mark) continue; 1182 1183 /* import extended row */ 1184 mark--; 1185 start = mark*extmem+extrowcum[mark]; 1186 size = ii[i+1]-ii[i]; 1187 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1188 ierr = PetscArraycpy(extrow+start,jj+ii[i],size);CHKERRQ(ierr); 1189 extrowcum[mark] += size; 1190 } 1191 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1192 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1193 ierr = PetscFree(marks);CHKERRQ(ierr); 1194 1195 /* Compress extrows */ 1196 cum = 0; 1197 for (i=0;i<nee;i++) { 1198 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1199 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1201 cum = PetscMax(cum,size); 1202 } 1203 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1204 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1205 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1206 1207 /* Workspace for lapack inner calls and VecSetValues */ 1208 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1209 1210 /* Create change of basis matrix (preallocation can be improved) */ 1211 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1212 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1213 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1214 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1215 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1216 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1217 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1218 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1219 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1220 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1221 1222 /* Defaults to identity */ 1223 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1224 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1225 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1226 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1227 1228 /* Create discrete gradient for the coarser level if needed */ 1229 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1230 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1231 if (pcbddc->current_level < pcbddc->max_levels) { 1232 ISLocalToGlobalMapping cel2g,cvl2g; 1233 IS wis,gwis; 1234 PetscInt cnv,cne; 1235 1236 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1237 if (fl2g) { 1238 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1241 pcbddc->nedclocal = wis; 1242 } 1243 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1244 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1245 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1246 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1247 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1248 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1249 1250 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1251 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1252 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1253 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1254 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1255 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1256 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1257 1258 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1259 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1260 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1261 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1262 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1263 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1265 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1266 } 1267 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1268 1269 #if defined(PRINT_GDET) 1270 inc = 0; 1271 lev = pcbddc->current_level; 1272 #endif 1273 1274 /* Insert values in the change of basis matrix */ 1275 for (i=0;i<nee;i++) { 1276 Mat Gins = NULL, GKins = NULL; 1277 IS cornersis = NULL; 1278 PetscScalar cvals[2]; 1279 1280 if (pcbddc->nedcG) { 1281 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1282 } 1283 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1284 if (Gins && GKins) { 1285 const PetscScalar *data; 1286 const PetscInt *rows,*cols; 1287 PetscInt nrh,nch,nrc,ncc; 1288 1289 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1290 /* H1 */ 1291 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1292 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1293 ierr = MatDenseGetArrayRead(Gins,&data);CHKERRQ(ierr); 1294 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1295 ierr = MatDenseRestoreArrayRead(Gins,&data);CHKERRQ(ierr); 1296 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1297 /* complement */ 1298 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1299 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1300 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1301 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1302 ierr = MatDenseGetArrayRead(GKins,&data);CHKERRQ(ierr); 1303 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1304 ierr = MatDenseRestoreArrayRead(GKins,&data);CHKERRQ(ierr); 1305 1306 /* coarse discrete gradient */ 1307 if (pcbddc->nedcG) { 1308 PetscInt cols[2]; 1309 1310 cols[0] = 2*i; 1311 cols[1] = 2*i+1; 1312 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1313 } 1314 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1315 } 1316 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1317 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1318 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1319 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1320 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1321 } 1322 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1323 1324 /* Start assembling */ 1325 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1326 if (pcbddc->nedcG) { 1327 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1328 } 1329 1330 /* Free */ 1331 if (fl2g) { 1332 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1333 for (i=0;i<nee;i++) { 1334 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1335 } 1336 ierr = PetscFree(eedges);CHKERRQ(ierr); 1337 } 1338 1339 /* hack mat_graph with primal dofs on the coarse edges */ 1340 { 1341 PCBDDCGraph graph = pcbddc->mat_graph; 1342 PetscInt *oqueue = graph->queue; 1343 PetscInt *ocptr = graph->cptr; 1344 PetscInt ncc,*idxs; 1345 1346 /* find first primal edge */ 1347 if (pcbddc->nedclocal) { 1348 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1349 } else { 1350 if (fl2g) { 1351 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1352 } 1353 idxs = cedges; 1354 } 1355 cum = 0; 1356 while (cum < nee && cedges[cum] < 0) cum++; 1357 1358 /* adapt connected components */ 1359 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1360 graph->cptr[0] = 0; 1361 for (i=0,ncc=0;i<graph->ncc;i++) { 1362 PetscInt lc = ocptr[i+1]-ocptr[i]; 1363 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1364 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1365 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1366 ncc++; 1367 lc--; 1368 cum++; 1369 while (cum < nee && cedges[cum] < 0) cum++; 1370 } 1371 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1372 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1373 ncc++; 1374 } 1375 graph->ncc = ncc; 1376 if (pcbddc->nedclocal) { 1377 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1380 } 1381 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1382 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1384 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1385 1386 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1387 ierr = PetscFree(extrow);CHKERRQ(ierr); 1388 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1389 ierr = PetscFree(corners);CHKERRQ(ierr); 1390 ierr = PetscFree(cedges);CHKERRQ(ierr); 1391 ierr = PetscFree(extrows);CHKERRQ(ierr); 1392 ierr = PetscFree(extcols);CHKERRQ(ierr); 1393 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1394 1395 /* Complete assembling */ 1396 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 if (pcbddc->nedcG) { 1398 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 #if 0 1400 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1401 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1402 #endif 1403 } 1404 1405 /* set change of basis */ 1406 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1407 ierr = MatDestroy(&T);CHKERRQ(ierr); 1408 1409 PetscFunctionReturn(0); 1410 } 1411 1412 /* the near-null space of BDDC carries information on quadrature weights, 1413 and these can be collinear -> so cheat with MatNullSpaceCreate 1414 and create a suitable set of basis vectors first */ 1415 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1416 { 1417 PetscErrorCode ierr; 1418 PetscInt i; 1419 1420 PetscFunctionBegin; 1421 for (i=0;i<nvecs;i++) { 1422 PetscInt first,last; 1423 1424 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1425 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1426 if (i>=first && i < last) { 1427 PetscScalar *data; 1428 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1429 if (!has_const) { 1430 data[i-first] = 1.; 1431 } else { 1432 data[2*i-first] = 1./PetscSqrtReal(2.); 1433 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1434 } 1435 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1436 } 1437 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1438 } 1439 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1440 for (i=0;i<nvecs;i++) { /* reset vectors */ 1441 PetscInt first,last; 1442 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1443 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1444 if (i>=first && i < last) { 1445 PetscScalar *data; 1446 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1447 if (!has_const) { 1448 data[i-first] = 0.; 1449 } else { 1450 data[2*i-first] = 0.; 1451 data[2*i-first+1] = 0.; 1452 } 1453 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1454 } 1455 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1456 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1462 { 1463 Mat loc_divudotp; 1464 Vec p,v,vins,quad_vec,*quad_vecs; 1465 ISLocalToGlobalMapping map; 1466 PetscScalar *vals; 1467 const PetscScalar *array; 1468 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1469 PetscInt n_neigh,*neigh,*n_shared,**shared; 1470 PetscMPIInt rank; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1475 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1476 ierr = MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1477 if (!maxneighs) { 1478 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1479 *nnsp = NULL; 1480 PetscFunctionReturn(0); 1481 } 1482 maxsize = 0; 1483 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1484 ierr = PetscMalloc2(maxsize,&gidxs,maxsize,&vals);CHKERRQ(ierr); 1485 /* create vectors to hold quadrature weights */ 1486 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1487 if (!transpose) { 1488 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1489 } else { 1490 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1491 } 1492 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1493 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1494 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1495 for (i=0;i<maxneighs;i++) { 1496 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1497 } 1498 1499 /* compute local quad vec */ 1500 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1501 if (!transpose) { 1502 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1505 } 1506 ierr = VecSet(p,1.);CHKERRQ(ierr); 1507 if (!transpose) { 1508 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1509 } else { 1510 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1511 } 1512 if (vl2l) { 1513 Mat lA; 1514 VecScatter sc; 1515 1516 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1517 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1518 ierr = VecScatterCreate(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1519 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1522 } else { 1523 vins = v; 1524 } 1525 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1526 ierr = VecDestroy(&p);CHKERRQ(ierr); 1527 1528 /* insert in global quadrature vecs */ 1529 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1530 for (i=1;i<n_neigh;i++) { 1531 const PetscInt *idxs; 1532 PetscInt idx,nn,j; 1533 1534 idxs = shared[i]; 1535 nn = n_shared[i]; 1536 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1537 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1538 idx = -(idx+1); 1539 if (idx < 0 || idx >= maxneighs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs); 1540 ierr = ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);CHKERRQ(ierr); 1541 ierr = VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1542 } 1543 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1544 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1545 if (vl2l) { 1546 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1547 } 1548 ierr = VecDestroy(&v);CHKERRQ(ierr); 1549 ierr = PetscFree2(gidxs,vals);CHKERRQ(ierr); 1550 1551 /* assemble near null space */ 1552 for (i=0;i<maxneighs;i++) { 1553 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1554 } 1555 for (i=0;i<maxneighs;i++) { 1556 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1557 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1558 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1559 } 1560 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1565 { 1566 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1567 PetscErrorCode ierr; 1568 1569 PetscFunctionBegin; 1570 if (primalv) { 1571 if (pcbddc->user_primal_vertices_local) { 1572 IS list[2], newp; 1573 1574 list[0] = primalv; 1575 list[1] = pcbddc->user_primal_vertices_local; 1576 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1577 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1578 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1579 pcbddc->user_primal_vertices_local = newp; 1580 } else { 1581 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1582 } 1583 } 1584 PetscFunctionReturn(0); 1585 } 1586 1587 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1588 { 1589 PetscInt f, *comp = (PetscInt *)ctx; 1590 1591 PetscFunctionBegin; 1592 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1597 { 1598 PetscErrorCode ierr; 1599 Vec local,global; 1600 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1601 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1602 PetscBool monolithic = PETSC_FALSE; 1603 1604 PetscFunctionBegin; 1605 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1606 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1607 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1608 /* need to convert from global to local topology information and remove references to information in global ordering */ 1609 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1610 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1611 ierr = VecBindToCPU(global,PETSC_TRUE);CHKERRQ(ierr); 1612 ierr = VecBindToCPU(local,PETSC_TRUE);CHKERRQ(ierr); 1613 if (monolithic) { /* just get block size to properly compute vertices */ 1614 if (pcbddc->vertex_size == 1) { 1615 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1616 } 1617 goto boundary; 1618 } 1619 1620 if (pcbddc->user_provided_isfordofs) { 1621 if (pcbddc->n_ISForDofs) { 1622 PetscInt i; 1623 1624 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1625 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1626 PetscInt bs; 1627 1628 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1629 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1630 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1631 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1632 } 1633 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1634 pcbddc->n_ISForDofs = 0; 1635 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1636 } 1637 } else { 1638 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1639 DM dm; 1640 1641 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1642 if (!dm) { 1643 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1644 } 1645 if (dm) { 1646 IS *fields; 1647 PetscInt nf,i; 1648 1649 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1651 for (i=0;i<nf;i++) { 1652 PetscInt bs; 1653 1654 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1655 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1656 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1657 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1658 } 1659 ierr = PetscFree(fields);CHKERRQ(ierr); 1660 pcbddc->n_ISForDofsLocal = nf; 1661 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1662 PetscContainer c; 1663 1664 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1665 if (c) { 1666 MatISLocalFields lf; 1667 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1668 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1669 } else { /* fallback, create the default fields if bs > 1 */ 1670 PetscInt i, n = matis->A->rmap->n; 1671 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1672 if (i > 1) { 1673 pcbddc->n_ISForDofsLocal = i; 1674 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1675 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1676 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 } else { 1682 PetscInt i; 1683 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1684 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1685 } 1686 } 1687 } 1688 1689 boundary: 1690 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1692 } else if (pcbddc->DirichletBoundariesLocal) { 1693 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1694 } 1695 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1696 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1697 } else if (pcbddc->NeumannBoundariesLocal) { 1698 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1699 } 1700 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1701 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1702 } 1703 ierr = VecDestroy(&global);CHKERRQ(ierr); 1704 ierr = VecDestroy(&local);CHKERRQ(ierr); 1705 /* detect local disconnected subdomains if requested (use matis->A) */ 1706 if (pcbddc->detect_disconnected) { 1707 IS primalv = NULL; 1708 PetscInt i; 1709 PetscBool filter = pcbddc->detect_disconnected_filter; 1710 1711 for (i=0;i<pcbddc->n_local_subs;i++) { 1712 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1713 } 1714 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1715 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1716 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1717 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1718 } 1719 /* early stage corner detection */ 1720 { 1721 DM dm; 1722 1723 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1724 if (!dm) { 1725 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1726 } 1727 if (dm) { 1728 PetscBool isda; 1729 1730 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1731 if (isda) { 1732 ISLocalToGlobalMapping l2l; 1733 IS corners; 1734 Mat lA; 1735 PetscBool gl,lo; 1736 1737 { 1738 Vec cvec; 1739 const PetscScalar *coords; 1740 PetscInt dof,n,cdim; 1741 PetscBool memc = PETSC_TRUE; 1742 1743 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1744 ierr = DMGetCoordinates(dm,&cvec);CHKERRQ(ierr); 1745 ierr = VecGetLocalSize(cvec,&n);CHKERRQ(ierr); 1746 ierr = VecGetBlockSize(cvec,&cdim);CHKERRQ(ierr); 1747 n /= cdim; 1748 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 1749 ierr = PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);CHKERRQ(ierr); 1750 ierr = VecGetArrayRead(cvec,&coords);CHKERRQ(ierr); 1751 #if defined(PETSC_USE_COMPLEX) 1752 memc = PETSC_FALSE; 1753 #endif 1754 if (dof != 1) memc = PETSC_FALSE; 1755 if (memc) { 1756 ierr = PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);CHKERRQ(ierr); 1757 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1758 PetscReal *bcoords = pcbddc->mat_graph->coords; 1759 PetscInt i, b, d; 1760 1761 for (i=0;i<n;i++) { 1762 for (b=0;b<dof;b++) { 1763 for (d=0;d<cdim;d++) { 1764 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1765 } 1766 } 1767 } 1768 } 1769 ierr = VecRestoreArrayRead(cvec,&coords);CHKERRQ(ierr); 1770 pcbddc->mat_graph->cdim = cdim; 1771 pcbddc->mat_graph->cnloc = dof*n; 1772 pcbddc->mat_graph->cloc = PETSC_FALSE; 1773 } 1774 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1775 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1776 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1777 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1778 lo = (PetscBool)(l2l && corners); 1779 ierr = MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1780 if (gl) { /* From PETSc's DMDA */ 1781 const PetscInt *idx; 1782 PetscInt dof,bs,*idxout,n; 1783 1784 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1785 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1786 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1787 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1788 if (bs == dof) { 1789 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1790 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1791 } else { /* the original DMDA local-to-local map have been modified */ 1792 PetscInt i,d; 1793 1794 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1795 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1796 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1797 1798 bs = 1; 1799 n *= dof; 1800 } 1801 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1802 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1803 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1804 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1805 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1806 pcbddc->corner_selected = PETSC_TRUE; 1807 pcbddc->corner_selection = PETSC_TRUE; 1808 } 1809 if (corners) { 1810 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1811 } 1812 } 1813 } 1814 } 1815 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1816 DM dm; 1817 1818 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1819 if (!dm) { 1820 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1821 } 1822 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1823 Vec vcoords; 1824 PetscSection section; 1825 PetscReal *coords; 1826 PetscInt d,cdim,nl,nf,**ctxs; 1827 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1828 1829 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1830 ierr = DMGetLocalSection(dm,§ion);CHKERRQ(ierr); 1831 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1832 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1833 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1834 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1835 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1836 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1837 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1838 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1839 for (d=0;d<cdim;d++) { 1840 PetscInt i; 1841 const PetscScalar *v; 1842 1843 for (i=0;i<nf;i++) ctxs[i][0] = d; 1844 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1845 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1846 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1847 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1848 } 1849 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1850 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1851 ierr = PetscFree(coords);CHKERRQ(ierr); 1852 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1853 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1854 } 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1860 { 1861 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1862 PetscErrorCode ierr; 1863 IS nis; 1864 const PetscInt *idxs; 1865 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1866 PetscBool *ld; 1867 1868 PetscFunctionBegin; 1869 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1870 if (mop == MPI_LAND) { 1871 /* init rootdata with true */ 1872 ld = (PetscBool*) matis->sf_rootdata; 1873 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1874 } else { 1875 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 1876 } 1877 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 1878 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1879 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1880 ld = (PetscBool*) matis->sf_leafdata; 1881 for (i=0;i<nd;i++) 1882 if (-1 < idxs[i] && idxs[i] < n) 1883 ld[idxs[i]] = PETSC_TRUE; 1884 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1885 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1886 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1887 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1888 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1889 if (mop == MPI_LAND) { 1890 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1891 } else { 1892 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1893 } 1894 for (i=0,nnd=0;i<n;i++) 1895 if (ld[i]) 1896 nidxs[nnd++] = i; 1897 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1898 ierr = ISDestroy(is);CHKERRQ(ierr); 1899 *is = nis; 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1904 { 1905 PC_IS *pcis = (PC_IS*)(pc->data); 1906 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1907 PetscErrorCode ierr; 1908 1909 PetscFunctionBegin; 1910 if (!pcbddc->benign_have_null) { 1911 PetscFunctionReturn(0); 1912 } 1913 if (pcbddc->ChangeOfBasisMatrix) { 1914 Vec swap; 1915 1916 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1917 swap = pcbddc->work_change; 1918 pcbddc->work_change = r; 1919 r = swap; 1920 } 1921 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1922 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1923 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1924 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 1925 ierr = VecSet(z,0.);CHKERRQ(ierr); 1926 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1927 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1928 if (pcbddc->ChangeOfBasisMatrix) { 1929 pcbddc->work_change = r; 1930 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1931 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1932 } 1933 PetscFunctionReturn(0); 1934 } 1935 1936 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1937 { 1938 PCBDDCBenignMatMult_ctx ctx; 1939 PetscErrorCode ierr; 1940 PetscBool apply_right,apply_left,reset_x; 1941 1942 PetscFunctionBegin; 1943 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1944 if (transpose) { 1945 apply_right = ctx->apply_left; 1946 apply_left = ctx->apply_right; 1947 } else { 1948 apply_right = ctx->apply_right; 1949 apply_left = ctx->apply_left; 1950 } 1951 reset_x = PETSC_FALSE; 1952 if (apply_right) { 1953 const PetscScalar *ax; 1954 PetscInt nl,i; 1955 1956 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1957 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1958 ierr = PetscArraycpy(ctx->work,ax,nl);CHKERRQ(ierr); 1959 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1960 for (i=0;i<ctx->benign_n;i++) { 1961 PetscScalar sum,val; 1962 const PetscInt *idxs; 1963 PetscInt nz,j; 1964 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1965 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1966 sum = 0.; 1967 if (ctx->apply_p0) { 1968 val = ctx->work[idxs[nz-1]]; 1969 for (j=0;j<nz-1;j++) { 1970 sum += ctx->work[idxs[j]]; 1971 ctx->work[idxs[j]] += val; 1972 } 1973 } else { 1974 for (j=0;j<nz-1;j++) { 1975 sum += ctx->work[idxs[j]]; 1976 } 1977 } 1978 ctx->work[idxs[nz-1]] -= sum; 1979 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1980 } 1981 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1982 reset_x = PETSC_TRUE; 1983 } 1984 if (transpose) { 1985 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1986 } else { 1987 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1988 } 1989 if (reset_x) { 1990 ierr = VecResetArray(x);CHKERRQ(ierr); 1991 } 1992 if (apply_left) { 1993 PetscScalar *ay; 1994 PetscInt i; 1995 1996 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1997 for (i=0;i<ctx->benign_n;i++) { 1998 PetscScalar sum,val; 1999 const PetscInt *idxs; 2000 PetscInt nz,j; 2001 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2002 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2003 val = -ay[idxs[nz-1]]; 2004 if (ctx->apply_p0) { 2005 sum = 0.; 2006 for (j=0;j<nz-1;j++) { 2007 sum += ay[idxs[j]]; 2008 ay[idxs[j]] += val; 2009 } 2010 ay[idxs[nz-1]] += sum; 2011 } else { 2012 for (j=0;j<nz-1;j++) { 2013 ay[idxs[j]] += val; 2014 } 2015 ay[idxs[nz-1]] = 0.; 2016 } 2017 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2018 } 2019 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 2020 } 2021 PetscFunctionReturn(0); 2022 } 2023 2024 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2025 { 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2030 PetscFunctionReturn(0); 2031 } 2032 2033 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2034 { 2035 PetscErrorCode ierr; 2036 2037 PetscFunctionBegin; 2038 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2043 { 2044 PC_IS *pcis = (PC_IS*)pc->data; 2045 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2046 PCBDDCBenignMatMult_ctx ctx; 2047 PetscErrorCode ierr; 2048 2049 PetscFunctionBegin; 2050 if (!restore) { 2051 Mat A_IB,A_BI; 2052 PetscScalar *work; 2053 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2054 2055 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2056 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2057 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2058 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2059 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2060 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2061 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2062 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2063 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2064 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2065 ctx->apply_left = PETSC_TRUE; 2066 ctx->apply_right = PETSC_FALSE; 2067 ctx->apply_p0 = PETSC_FALSE; 2068 ctx->benign_n = pcbddc->benign_n; 2069 if (reuse) { 2070 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2071 ctx->free = PETSC_FALSE; 2072 } else { /* TODO: could be optimized for successive solves */ 2073 ISLocalToGlobalMapping N_to_D; 2074 PetscInt i; 2075 2076 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2077 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2078 for (i=0;i<pcbddc->benign_n;i++) { 2079 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2080 } 2081 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2082 ctx->free = PETSC_TRUE; 2083 } 2084 ctx->A = pcis->A_IB; 2085 ctx->work = work; 2086 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2087 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 pcis->A_IB = A_IB; 2090 2091 /* A_BI as A_IB^T */ 2092 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2093 pcbddc->benign_original_mat = pcis->A_BI; 2094 pcis->A_BI = A_BI; 2095 } else { 2096 if (!pcbddc->benign_original_mat) { 2097 PetscFunctionReturn(0); 2098 } 2099 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2100 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2101 pcis->A_IB = ctx->A; 2102 ctx->A = NULL; 2103 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2104 pcis->A_BI = pcbddc->benign_original_mat; 2105 pcbddc->benign_original_mat = NULL; 2106 if (ctx->free) { 2107 PetscInt i; 2108 for (i=0;i<ctx->benign_n;i++) { 2109 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2110 } 2111 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2112 } 2113 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2114 ierr = PetscFree(ctx);CHKERRQ(ierr); 2115 } 2116 PetscFunctionReturn(0); 2117 } 2118 2119 /* used just in bddc debug mode */ 2120 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2121 { 2122 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2123 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2124 Mat An; 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2129 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2130 if (is1) { 2131 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2132 ierr = MatDestroy(&An);CHKERRQ(ierr); 2133 } else { 2134 *B = An; 2135 } 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /* TODO: add reuse flag */ 2140 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2141 { 2142 Mat Bt; 2143 PetscScalar *a,*bdata; 2144 const PetscInt *ii,*ij; 2145 PetscInt m,n,i,nnz,*bii,*bij; 2146 PetscBool flg_row; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2151 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2152 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2153 nnz = n; 2154 for (i=0;i<ii[n];i++) { 2155 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2156 } 2157 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2158 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2159 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2160 nnz = 0; 2161 bii[0] = 0; 2162 for (i=0;i<n;i++) { 2163 PetscInt j; 2164 for (j=ii[i];j<ii[i+1];j++) { 2165 PetscScalar entry = a[j]; 2166 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2167 bij[nnz] = ij[j]; 2168 bdata[nnz] = entry; 2169 nnz++; 2170 } 2171 } 2172 bii[i+1] = nnz; 2173 } 2174 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2175 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2176 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2177 { 2178 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2179 b->free_a = PETSC_TRUE; 2180 b->free_ij = PETSC_TRUE; 2181 } 2182 if (*B == A) { 2183 ierr = MatDestroy(&A);CHKERRQ(ierr); 2184 } 2185 *B = Bt; 2186 PetscFunctionReturn(0); 2187 } 2188 2189 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2190 { 2191 Mat B = NULL; 2192 DM dm; 2193 IS is_dummy,*cc_n; 2194 ISLocalToGlobalMapping l2gmap_dummy; 2195 PCBDDCGraph graph; 2196 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2197 PetscInt i,n; 2198 PetscInt *xadj,*adjncy; 2199 PetscBool isplex = PETSC_FALSE; 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 if (ncc) *ncc = 0; 2204 if (cc) *cc = NULL; 2205 if (primalv) *primalv = NULL; 2206 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2207 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2208 if (!dm) { 2209 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2210 } 2211 if (dm) { 2212 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2213 } 2214 if (filter) isplex = PETSC_FALSE; 2215 2216 if (isplex) { /* this code has been modified from plexpartition.c */ 2217 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2218 PetscInt *adj = NULL; 2219 IS cellNumbering; 2220 const PetscInt *cellNum; 2221 PetscBool useCone, useClosure; 2222 PetscSection section; 2223 PetscSegBuffer adjBuffer; 2224 PetscSF sfPoint; 2225 PetscErrorCode ierr; 2226 2227 PetscFunctionBegin; 2228 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2229 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2230 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2231 /* Build adjacency graph via a section/segbuffer */ 2232 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2233 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2234 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2235 /* Always use FVM adjacency to create partitioner graph */ 2236 ierr = DMGetBasicAdjacency(dm, &useCone, &useClosure);CHKERRQ(ierr); 2237 ierr = DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);CHKERRQ(ierr); 2238 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2239 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2240 for (n = 0, p = pStart; p < pEnd; p++) { 2241 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2242 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2243 adjSize = PETSC_DETERMINE; 2244 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2245 for (a = 0; a < adjSize; ++a) { 2246 const PetscInt point = adj[a]; 2247 if (pStart <= point && point < pEnd) { 2248 PetscInt *PETSC_RESTRICT pBuf; 2249 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2250 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2251 *pBuf = point; 2252 } 2253 } 2254 n++; 2255 } 2256 ierr = DMSetBasicAdjacency(dm, useCone, useClosure);CHKERRQ(ierr); 2257 /* Derive CSR graph from section/segbuffer */ 2258 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2259 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2260 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2261 for (idx = 0, p = pStart; p < pEnd; p++) { 2262 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2263 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2264 } 2265 xadj[n] = size; 2266 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2267 /* Clean up */ 2268 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2269 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2270 ierr = PetscFree(adj);CHKERRQ(ierr); 2271 graph->xadj = xadj; 2272 graph->adjncy = adjncy; 2273 } else { 2274 Mat A; 2275 PetscBool isseqaij, flg_row; 2276 2277 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2278 if (!A->rmap->N || !A->cmap->N) { 2279 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2283 if (!isseqaij && filter) { 2284 PetscBool isseqdense; 2285 2286 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2287 if (!isseqdense) { 2288 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2289 } else { /* TODO: rectangular case and LDA */ 2290 PetscScalar *array; 2291 PetscReal chop=1.e-6; 2292 2293 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2294 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2295 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2296 for (i=0;i<n;i++) { 2297 PetscInt j; 2298 for (j=i+1;j<n;j++) { 2299 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2300 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2301 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2302 } 2303 } 2304 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2305 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2306 } 2307 } else { 2308 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2309 B = A; 2310 } 2311 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2312 2313 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2314 if (filter) { 2315 PetscScalar *data; 2316 PetscInt j,cum; 2317 2318 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2319 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2320 cum = 0; 2321 for (i=0;i<n;i++) { 2322 PetscInt t; 2323 2324 for (j=xadj[i];j<xadj[i+1];j++) { 2325 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2326 continue; 2327 } 2328 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2329 } 2330 t = xadj_filtered[i]; 2331 xadj_filtered[i] = cum; 2332 cum += t; 2333 } 2334 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2335 graph->xadj = xadj_filtered; 2336 graph->adjncy = adjncy_filtered; 2337 } else { 2338 graph->xadj = xadj; 2339 graph->adjncy = adjncy; 2340 } 2341 } 2342 /* compute local connected components using PCBDDCGraph */ 2343 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2344 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2345 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2346 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2347 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2348 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2349 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2350 2351 /* partial clean up */ 2352 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2353 if (B) { 2354 PetscBool flg_row; 2355 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2356 ierr = MatDestroy(&B);CHKERRQ(ierr); 2357 } 2358 if (isplex) { 2359 ierr = PetscFree(xadj);CHKERRQ(ierr); 2360 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2361 } 2362 2363 /* get back data */ 2364 if (isplex) { 2365 if (ncc) *ncc = graph->ncc; 2366 if (cc || primalv) { 2367 Mat A; 2368 PetscBT btv,btvt; 2369 PetscSection subSection; 2370 PetscInt *ids,cum,cump,*cids,*pids; 2371 2372 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2373 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2374 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2375 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2376 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2377 2378 cids[0] = 0; 2379 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2380 PetscInt j; 2381 2382 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2383 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2384 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2385 2386 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2387 for (k = 0; k < 2*size; k += 2) { 2388 PetscInt s, pp, p = closure[k], off, dof, cdof; 2389 2390 ierr = PetscSectionGetConstraintDof(subSection,p,&cdof);CHKERRQ(ierr); 2391 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2392 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2393 for (s = 0; s < dof-cdof; s++) { 2394 if (PetscBTLookupSet(btvt,off+s)) continue; 2395 if (!PetscBTLookup(btv,off+s)) { 2396 ids[cum++] = off+s; 2397 } else { /* cross-vertex */ 2398 pids[cump++] = off+s; 2399 } 2400 } 2401 ierr = DMPlexGetTreeParent(dm,p,&pp,NULL);CHKERRQ(ierr); 2402 if (pp != p) { 2403 ierr = PetscSectionGetConstraintDof(subSection,pp,&cdof);CHKERRQ(ierr); 2404 ierr = PetscSectionGetOffset(subSection,pp,&off);CHKERRQ(ierr); 2405 ierr = PetscSectionGetDof(subSection,pp,&dof);CHKERRQ(ierr); 2406 for (s = 0; s < dof-cdof; s++) { 2407 if (PetscBTLookupSet(btvt,off+s)) continue; 2408 if (!PetscBTLookup(btv,off+s)) { 2409 ids[cum++] = off+s; 2410 } else { /* cross-vertex */ 2411 pids[cump++] = off+s; 2412 } 2413 } 2414 } 2415 } 2416 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2417 } 2418 cids[i+1] = cum; 2419 /* mark dofs as already assigned */ 2420 for (j = cids[i]; j < cids[i+1]; j++) { 2421 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2422 } 2423 } 2424 if (cc) { 2425 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2426 for (i = 0; i < graph->ncc; i++) { 2427 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2428 } 2429 *cc = cc_n; 2430 } 2431 if (primalv) { 2432 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2433 } 2434 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2435 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2436 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2437 } 2438 } else { 2439 if (ncc) *ncc = graph->ncc; 2440 if (cc) { 2441 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2442 for (i=0;i<graph->ncc;i++) { 2443 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2444 } 2445 *cc = cc_n; 2446 } 2447 } 2448 /* clean up graph */ 2449 graph->xadj = NULL; 2450 graph->adjncy = NULL; 2451 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2456 { 2457 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2458 PC_IS* pcis = (PC_IS*)(pc->data); 2459 IS dirIS = NULL; 2460 PetscInt i; 2461 PetscErrorCode ierr; 2462 2463 PetscFunctionBegin; 2464 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2465 if (zerodiag) { 2466 Mat A; 2467 Vec vec3_N; 2468 PetscScalar *vals; 2469 const PetscInt *idxs; 2470 PetscInt nz,*count; 2471 2472 /* p0 */ 2473 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2474 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2475 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2476 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2477 for (i=0;i<nz;i++) vals[i] = 1.; 2478 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2479 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2480 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2481 /* v_I */ 2482 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2483 for (i=0;i<nz;i++) vals[i] = 0.; 2484 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2485 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2486 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2487 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2488 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2489 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2490 if (dirIS) { 2491 PetscInt n; 2492 2493 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2494 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2495 for (i=0;i<n;i++) vals[i] = 0.; 2496 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2497 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2498 } 2499 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2500 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2501 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2502 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2503 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2504 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2505 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2506 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2507 ierr = PetscFree(vals);CHKERRQ(ierr); 2508 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2509 2510 /* there should not be any pressure dofs lying on the interface */ 2511 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2512 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2513 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2514 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2515 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2516 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2517 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2518 ierr = PetscFree(count);CHKERRQ(ierr); 2519 } 2520 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2521 2522 /* check PCBDDCBenignGetOrSetP0 */ 2523 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2524 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2525 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2526 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2527 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2528 for (i=0;i<pcbddc->benign_n;i++) { 2529 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2530 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2531 } 2532 PetscFunctionReturn(0); 2533 } 2534 2535 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2536 { 2537 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2538 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2539 PetscInt nz,n,benign_n,bsp = 1; 2540 PetscInt *interior_dofs,n_interior_dofs,nneu; 2541 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2542 PetscErrorCode ierr; 2543 2544 PetscFunctionBegin; 2545 if (reuse) goto project_b0; 2546 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2547 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2548 for (n=0;n<pcbddc->benign_n;n++) { 2549 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2550 } 2551 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2552 has_null_pressures = PETSC_TRUE; 2553 have_null = PETSC_TRUE; 2554 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2555 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2556 Checks if all the pressure dofs in each subdomain have a zero diagonal 2557 If not, a change of basis on pressures is not needed 2558 since the local Schur complements are already SPD 2559 */ 2560 if (pcbddc->n_ISForDofsLocal) { 2561 IS iP = NULL; 2562 PetscInt p,*pp; 2563 PetscBool flg; 2564 2565 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);CHKERRQ(ierr); 2566 n = pcbddc->n_ISForDofsLocal; 2567 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2568 ierr = PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);CHKERRQ(ierr); 2569 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2570 if (!flg) { 2571 n = 1; 2572 pp[0] = pcbddc->n_ISForDofsLocal-1; 2573 } 2574 2575 bsp = 0; 2576 for (p=0;p<n;p++) { 2577 PetscInt bs; 2578 2579 if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2580 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2581 bsp += bs; 2582 } 2583 ierr = PetscMalloc1(bsp,&bzerodiag);CHKERRQ(ierr); 2584 bsp = 0; 2585 for (p=0;p<n;p++) { 2586 const PetscInt *idxs; 2587 PetscInt b,bs,npl,*bidxs; 2588 2589 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2590 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);CHKERRQ(ierr); 2591 ierr = ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2592 ierr = PetscMalloc1(npl/bs,&bidxs);CHKERRQ(ierr); 2593 for (b=0;b<bs;b++) { 2594 PetscInt i; 2595 2596 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2597 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);CHKERRQ(ierr); 2598 bsp++; 2599 } 2600 ierr = PetscFree(bidxs);CHKERRQ(ierr); 2601 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2602 } 2603 ierr = ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);CHKERRQ(ierr); 2604 2605 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2606 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2607 if (iP) { 2608 IS newpressures; 2609 2610 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2611 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2612 pressures = newpressures; 2613 } 2614 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2615 if (!sorted) { 2616 ierr = ISSort(pressures);CHKERRQ(ierr); 2617 } 2618 ierr = PetscFree(pp);CHKERRQ(ierr); 2619 } 2620 2621 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2622 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2623 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2624 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2625 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2626 if (!sorted) { 2627 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2628 } 2629 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2630 zerodiag_save = zerodiag; 2631 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2632 if (!nz) { 2633 if (n) have_null = PETSC_FALSE; 2634 has_null_pressures = PETSC_FALSE; 2635 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2636 } 2637 recompute_zerodiag = PETSC_FALSE; 2638 2639 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2640 zerodiag_subs = NULL; 2641 benign_n = 0; 2642 n_interior_dofs = 0; 2643 interior_dofs = NULL; 2644 nneu = 0; 2645 if (pcbddc->NeumannBoundariesLocal) { 2646 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2647 } 2648 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2649 if (checkb) { /* need to compute interior nodes */ 2650 PetscInt n,i,j; 2651 PetscInt n_neigh,*neigh,*n_shared,**shared; 2652 PetscInt *iwork; 2653 2654 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2655 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2656 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2657 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2658 for (i=1;i<n_neigh;i++) 2659 for (j=0;j<n_shared[i];j++) 2660 iwork[shared[i][j]] += 1; 2661 for (i=0;i<n;i++) 2662 if (!iwork[i]) 2663 interior_dofs[n_interior_dofs++] = i; 2664 ierr = PetscFree(iwork);CHKERRQ(ierr); 2665 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2666 } 2667 if (has_null_pressures) { 2668 IS *subs; 2669 PetscInt nsubs,i,j,nl; 2670 const PetscInt *idxs; 2671 PetscScalar *array; 2672 Vec *work; 2673 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2674 2675 subs = pcbddc->local_subs; 2676 nsubs = pcbddc->n_local_subs; 2677 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2678 if (checkb) { 2679 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2680 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2681 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2682 /* work[0] = 1_p */ 2683 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2684 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2685 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2686 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2687 /* work[0] = 1_v */ 2688 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2689 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2690 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2691 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2692 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2693 } 2694 2695 if (nsubs > 1 || bsp > 1) { 2696 IS *is; 2697 PetscInt b,totb; 2698 2699 totb = bsp; 2700 is = bsp > 1 ? bzerodiag : &zerodiag; 2701 nsubs = PetscMax(nsubs,1); 2702 ierr = PetscCalloc1(nsubs*totb,&zerodiag_subs);CHKERRQ(ierr); 2703 for (b=0;b<totb;b++) { 2704 for (i=0;i<nsubs;i++) { 2705 ISLocalToGlobalMapping l2g; 2706 IS t_zerodiag_subs; 2707 PetscInt nl; 2708 2709 if (subs) { 2710 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2711 } else { 2712 IS tis; 2713 2714 ierr = MatGetLocalSize(pcbddc->local_mat,&nl,NULL);CHKERRQ(ierr); 2715 ierr = ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);CHKERRQ(ierr); 2716 ierr = ISLocalToGlobalMappingCreateIS(tis,&l2g);CHKERRQ(ierr); 2717 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2718 } 2719 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);CHKERRQ(ierr); 2720 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2721 if (nl) { 2722 PetscBool valid = PETSC_TRUE; 2723 2724 if (checkb) { 2725 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2726 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2727 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2728 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2729 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2730 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2731 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2732 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2733 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2734 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2735 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2736 for (j=0;j<n_interior_dofs;j++) { 2737 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2738 valid = PETSC_FALSE; 2739 break; 2740 } 2741 } 2742 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2743 } 2744 if (valid && nneu) { 2745 const PetscInt *idxs; 2746 PetscInt nzb; 2747 2748 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2749 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2750 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2751 if (nzb) valid = PETSC_FALSE; 2752 } 2753 if (valid && pressures) { 2754 IS t_pressure_subs,tmp; 2755 PetscInt i1,i2; 2756 2757 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2758 ierr = ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);CHKERRQ(ierr); 2759 ierr = ISGetLocalSize(tmp,&i1);CHKERRQ(ierr); 2760 ierr = ISGetLocalSize(t_zerodiag_subs,&i2);CHKERRQ(ierr); 2761 if (i2 != i1) valid = PETSC_FALSE; 2762 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2763 ierr = ISDestroy(&tmp);CHKERRQ(ierr); 2764 } 2765 if (valid) { 2766 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);CHKERRQ(ierr); 2767 benign_n++; 2768 } else recompute_zerodiag = PETSC_TRUE; 2769 } 2770 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2771 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2772 } 2773 } 2774 } else { /* there's just one subdomain (or zero if they have not been detected */ 2775 PetscBool valid = PETSC_TRUE; 2776 2777 if (nneu) valid = PETSC_FALSE; 2778 if (valid && pressures) { 2779 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2780 } 2781 if (valid && checkb) { 2782 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2783 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2784 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2785 for (j=0;j<n_interior_dofs;j++) { 2786 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2787 valid = PETSC_FALSE; 2788 break; 2789 } 2790 } 2791 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2792 } 2793 if (valid) { 2794 benign_n = 1; 2795 ierr = PetscMalloc1(benign_n,&zerodiag_subs);CHKERRQ(ierr); 2796 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2797 zerodiag_subs[0] = zerodiag; 2798 } 2799 } 2800 if (checkb) { 2801 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2802 } 2803 } 2804 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2805 2806 if (!benign_n) { 2807 PetscInt n; 2808 2809 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2810 recompute_zerodiag = PETSC_FALSE; 2811 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2812 if (n) have_null = PETSC_FALSE; 2813 } 2814 2815 /* final check for null pressures */ 2816 if (zerodiag && pressures) { 2817 ierr = ISEqual(pressures,zerodiag,&have_null);CHKERRQ(ierr); 2818 } 2819 2820 if (recompute_zerodiag) { 2821 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2822 if (benign_n == 1) { 2823 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2824 zerodiag = zerodiag_subs[0]; 2825 } else { 2826 PetscInt i,nzn,*new_idxs; 2827 2828 nzn = 0; 2829 for (i=0;i<benign_n;i++) { 2830 PetscInt ns; 2831 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2832 nzn += ns; 2833 } 2834 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2835 nzn = 0; 2836 for (i=0;i<benign_n;i++) { 2837 PetscInt ns,*idxs; 2838 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2839 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2840 ierr = PetscArraycpy(new_idxs+nzn,idxs,ns);CHKERRQ(ierr); 2841 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2842 nzn += ns; 2843 } 2844 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2845 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2846 } 2847 have_null = PETSC_FALSE; 2848 } 2849 2850 /* determines if the coarse solver will be singular or not */ 2851 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2852 2853 /* Prepare matrix to compute no-net-flux */ 2854 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2855 Mat A,loc_divudotp; 2856 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2857 IS row,col,isused = NULL; 2858 PetscInt M,N,n,st,n_isused; 2859 2860 if (pressures) { 2861 isused = pressures; 2862 } else { 2863 isused = zerodiag_save; 2864 } 2865 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2866 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2867 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2868 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2869 n_isused = 0; 2870 if (isused) { 2871 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2872 } 2873 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2874 st = st-n_isused; 2875 if (n) { 2876 const PetscInt *gidxs; 2877 2878 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2879 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2880 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2881 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2882 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2883 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2884 } else { 2885 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2886 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2887 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2888 } 2889 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2890 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2891 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2892 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2893 ierr = ISDestroy(&row);CHKERRQ(ierr); 2894 ierr = ISDestroy(&col);CHKERRQ(ierr); 2895 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2896 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2897 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2898 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2899 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2900 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2901 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2902 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2903 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2904 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 } 2906 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2907 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2908 if (bzerodiag) { 2909 PetscInt i; 2910 2911 for (i=0;i<bsp;i++) { 2912 ierr = ISDestroy(&bzerodiag[i]);CHKERRQ(ierr); 2913 } 2914 ierr = PetscFree(bzerodiag);CHKERRQ(ierr); 2915 } 2916 pcbddc->benign_n = benign_n; 2917 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2918 2919 /* determines if the problem has subdomains with 0 pressure block */ 2920 have_null = (PetscBool)(!!pcbddc->benign_n); 2921 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2922 2923 project_b0: 2924 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2925 /* change of basis and p0 dofs */ 2926 if (pcbddc->benign_n) { 2927 PetscInt i,s,*nnz; 2928 2929 /* local change of basis for pressures */ 2930 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2931 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2932 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2933 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2934 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2935 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2936 for (i=0;i<pcbddc->benign_n;i++) { 2937 const PetscInt *idxs; 2938 PetscInt nzs,j; 2939 2940 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2941 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2942 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2943 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2944 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2945 } 2946 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2947 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2948 ierr = PetscFree(nnz);CHKERRQ(ierr); 2949 /* set identity by default */ 2950 for (i=0;i<n;i++) { 2951 ierr = MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);CHKERRQ(ierr); 2952 } 2953 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2954 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2955 /* set change on pressures */ 2956 for (s=0;s<pcbddc->benign_n;s++) { 2957 PetscScalar *array; 2958 const PetscInt *idxs; 2959 PetscInt nzs; 2960 2961 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2962 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2963 for (i=0;i<nzs-1;i++) { 2964 PetscScalar vals[2]; 2965 PetscInt cols[2]; 2966 2967 cols[0] = idxs[i]; 2968 cols[1] = idxs[nzs-1]; 2969 vals[0] = 1.; 2970 vals[1] = 1.; 2971 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2972 } 2973 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2974 for (i=0;i<nzs-1;i++) array[i] = -1.; 2975 array[nzs-1] = 1.; 2976 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2977 /* store local idxs for p0 */ 2978 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2979 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2980 ierr = PetscFree(array);CHKERRQ(ierr); 2981 } 2982 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2984 2985 /* project if needed */ 2986 if (pcbddc->benign_change_explicit) { 2987 Mat M; 2988 2989 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2990 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2991 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2992 ierr = MatDestroy(&M);CHKERRQ(ierr); 2993 } 2994 /* store global idxs for p0 */ 2995 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2996 } 2997 *zerodiaglocal = zerodiag; 2998 PetscFunctionReturn(0); 2999 } 3000 3001 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3002 { 3003 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3004 PetscScalar *array; 3005 PetscErrorCode ierr; 3006 3007 PetscFunctionBegin; 3008 if (!pcbddc->benign_sf) { 3009 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 3010 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 3011 } 3012 if (get) { 3013 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3014 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 3015 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 3016 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3017 } else { 3018 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 3019 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 3020 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 3021 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 3022 } 3023 PetscFunctionReturn(0); 3024 } 3025 3026 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3027 { 3028 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3029 PetscErrorCode ierr; 3030 3031 PetscFunctionBegin; 3032 /* TODO: add error checking 3033 - avoid nested pop (or push) calls. 3034 - cannot push before pop. 3035 - cannot call this if pcbddc->local_mat is NULL 3036 */ 3037 if (!pcbddc->benign_n) { 3038 PetscFunctionReturn(0); 3039 } 3040 if (pop) { 3041 if (pcbddc->benign_change_explicit) { 3042 IS is_p0; 3043 MatReuse reuse; 3044 3045 /* extract B_0 */ 3046 reuse = MAT_INITIAL_MATRIX; 3047 if (pcbddc->benign_B0) { 3048 reuse = MAT_REUSE_MATRIX; 3049 } 3050 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 3051 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 3052 /* remove rows and cols from local problem */ 3053 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 3054 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3055 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 3056 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3057 } else { 3058 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3059 PetscScalar *vals; 3060 PetscInt i,n,*idxs_ins; 3061 3062 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 3063 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 3064 if (!pcbddc->benign_B0) { 3065 PetscInt *nnz; 3066 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 3067 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 3068 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3069 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 3070 for (i=0;i<pcbddc->benign_n;i++) { 3071 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 3072 nnz[i] = n - nnz[i]; 3073 } 3074 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 3075 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3076 ierr = PetscFree(nnz);CHKERRQ(ierr); 3077 } 3078 3079 for (i=0;i<pcbddc->benign_n;i++) { 3080 PetscScalar *array; 3081 PetscInt *idxs,j,nz,cum; 3082 3083 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 3084 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3085 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3086 for (j=0;j<nz;j++) vals[j] = 1.; 3087 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3088 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 3089 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 3090 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 3091 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3092 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3093 cum = 0; 3094 for (j=0;j<n;j++) { 3095 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3096 vals[cum] = array[j]; 3097 idxs_ins[cum] = j; 3098 cum++; 3099 } 3100 } 3101 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3102 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3103 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3104 } 3105 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3106 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3107 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3108 } 3109 } else { /* push */ 3110 if (pcbddc->benign_change_explicit) { 3111 PetscInt i; 3112 3113 for (i=0;i<pcbddc->benign_n;i++) { 3114 PetscScalar *B0_vals; 3115 PetscInt *B0_cols,B0_ncol; 3116 3117 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3118 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3119 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3120 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3121 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3122 } 3123 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3124 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3125 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3126 } 3127 PetscFunctionReturn(0); 3128 } 3129 3130 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3131 { 3132 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3133 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3134 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3135 PetscBLASInt *B_iwork,*B_ifail; 3136 PetscScalar *work,lwork; 3137 PetscScalar *St,*S,*eigv; 3138 PetscScalar *Sarray,*Starray; 3139 PetscReal *eigs,thresh,lthresh,uthresh; 3140 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3141 PetscBool allocated_S_St; 3142 #if defined(PETSC_USE_COMPLEX) 3143 PetscReal *rwork; 3144 #endif 3145 PetscErrorCode ierr; 3146 3147 PetscFunctionBegin; 3148 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3149 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3150 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3151 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3152 3153 if (pcbddc->dbg_flag) { 3154 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3156 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3157 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3158 } 3159 3160 if (pcbddc->dbg_flag) { 3161 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3162 } 3163 3164 /* max size of subsets */ 3165 mss = 0; 3166 for (i=0;i<sub_schurs->n_subs;i++) { 3167 PetscInt subset_size; 3168 3169 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3170 mss = PetscMax(mss,subset_size); 3171 } 3172 3173 /* min/max and threshold */ 3174 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3175 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3176 nmax = PetscMax(nmin,nmax); 3177 allocated_S_St = PETSC_FALSE; 3178 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3179 allocated_S_St = PETSC_TRUE; 3180 } 3181 3182 /* allocate lapack workspace */ 3183 cum = cum2 = 0; 3184 maxneigs = 0; 3185 for (i=0;i<sub_schurs->n_subs;i++) { 3186 PetscInt n,subset_size; 3187 3188 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3189 n = PetscMin(subset_size,nmax); 3190 cum += subset_size; 3191 cum2 += subset_size*n; 3192 maxneigs = PetscMax(maxneigs,n); 3193 } 3194 lwork = 0; 3195 if (mss) { 3196 if (sub_schurs->is_symmetric) { 3197 PetscScalar sdummy = 0.; 3198 PetscBLASInt B_itype = 1; 3199 PetscBLASInt B_N = mss, idummy = 0; 3200 PetscReal rdummy = 0.,zero = 0.0; 3201 PetscReal eps = 0.0; /* dlamch? */ 3202 3203 B_lwork = -1; 3204 /* some implementations may complain about NULL pointers, even if we are querying */ 3205 S = &sdummy; 3206 St = &sdummy; 3207 eigs = &rdummy; 3208 eigv = &sdummy; 3209 B_iwork = &idummy; 3210 B_ifail = &idummy; 3211 #if defined(PETSC_USE_COMPLEX) 3212 rwork = &rdummy; 3213 #endif 3214 thresh = 1.0; 3215 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3216 #if defined(PETSC_USE_COMPLEX) 3217 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3218 #else 3219 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3220 #endif 3221 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3222 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3223 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3224 } 3225 3226 nv = 0; 3227 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3228 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3229 } 3230 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3231 if (allocated_S_St) { 3232 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3233 } 3234 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3235 #if defined(PETSC_USE_COMPLEX) 3236 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3237 #endif 3238 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3239 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3240 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3241 nv+cum,&pcbddc->adaptive_constraints_idxs, 3242 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3243 ierr = PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);CHKERRQ(ierr); 3244 3245 maxneigs = 0; 3246 cum = cumarray = 0; 3247 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3248 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3249 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3250 const PetscInt *idxs; 3251 3252 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3253 for (cum=0;cum<nv;cum++) { 3254 pcbddc->adaptive_constraints_n[cum] = 1; 3255 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3256 pcbddc->adaptive_constraints_data[cum] = 1.0; 3257 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3258 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3259 } 3260 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3261 } 3262 3263 if (mss) { /* multilevel */ 3264 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3265 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3266 } 3267 3268 lthresh = pcbddc->adaptive_threshold[0]; 3269 uthresh = pcbddc->adaptive_threshold[1]; 3270 for (i=0;i<sub_schurs->n_subs;i++) { 3271 const PetscInt *idxs; 3272 PetscReal upper,lower; 3273 PetscInt j,subset_size,eigs_start = 0; 3274 PetscBLASInt B_N; 3275 PetscBool same_data = PETSC_FALSE; 3276 PetscBool scal = PETSC_FALSE; 3277 3278 if (pcbddc->use_deluxe_scaling) { 3279 upper = PETSC_MAX_REAL; 3280 lower = uthresh; 3281 } else { 3282 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3283 upper = 1./uthresh; 3284 lower = 0.; 3285 } 3286 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3287 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3288 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3289 /* this is experimental: we assume the dofs have been properly grouped to have 3290 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3291 if (!sub_schurs->is_posdef) { 3292 Mat T; 3293 3294 for (j=0;j<subset_size;j++) { 3295 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3296 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3297 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3298 ierr = MatDestroy(&T);CHKERRQ(ierr); 3299 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3300 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3301 ierr = MatDestroy(&T);CHKERRQ(ierr); 3302 if (sub_schurs->change_primal_sub) { 3303 PetscInt nz,k; 3304 const PetscInt *idxs; 3305 3306 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3307 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3308 for (k=0;k<nz;k++) { 3309 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3310 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3311 } 3312 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3313 } 3314 scal = PETSC_TRUE; 3315 break; 3316 } 3317 } 3318 } 3319 3320 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3321 if (sub_schurs->is_symmetric) { 3322 PetscInt j,k; 3323 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3324 ierr = PetscArrayzero(S,subset_size*subset_size);CHKERRQ(ierr); 3325 ierr = PetscArrayzero(St,subset_size*subset_size);CHKERRQ(ierr); 3326 } 3327 for (j=0;j<subset_size;j++) { 3328 for (k=j;k<subset_size;k++) { 3329 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3330 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3331 } 3332 } 3333 } else { 3334 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3335 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3336 } 3337 } else { 3338 S = Sarray + cumarray; 3339 St = Starray + cumarray; 3340 } 3341 /* see if we can save some work */ 3342 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3343 ierr = PetscArraycmp(S,St,subset_size*subset_size,&same_data);CHKERRQ(ierr); 3344 } 3345 3346 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3347 B_neigs = 0; 3348 } else { 3349 if (sub_schurs->is_symmetric) { 3350 PetscBLASInt B_itype = 1; 3351 PetscBLASInt B_IL, B_IU; 3352 PetscReal eps = -1.0; /* dlamch? */ 3353 PetscInt nmin_s; 3354 PetscBool compute_range; 3355 3356 B_neigs = 0; 3357 compute_range = (PetscBool)!same_data; 3358 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3359 3360 if (pcbddc->dbg_flag) { 3361 PetscInt nc = 0; 3362 3363 if (sub_schurs->change_primal_sub) { 3364 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3365 } 3366 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3367 } 3368 3369 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3370 if (compute_range) { 3371 3372 /* ask for eigenvalues larger than thresh */ 3373 if (sub_schurs->is_posdef) { 3374 #if defined(PETSC_USE_COMPLEX) 3375 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3376 #else 3377 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3378 #endif 3379 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3380 } else { /* no theory so far, but it works nicely */ 3381 PetscInt recipe = 0,recipe_m = 1; 3382 PetscReal bb[2]; 3383 3384 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3385 switch (recipe) { 3386 case 0: 3387 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3388 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3389 #if defined(PETSC_USE_COMPLEX) 3390 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3391 #else 3392 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3393 #endif 3394 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3395 break; 3396 case 1: 3397 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3398 #if defined(PETSC_USE_COMPLEX) 3399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3400 #else 3401 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3402 #endif 3403 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3404 if (!scal) { 3405 PetscBLASInt B_neigs2 = 0; 3406 3407 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3408 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3409 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3410 #if defined(PETSC_USE_COMPLEX) 3411 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3412 #else 3413 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3414 #endif 3415 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3416 B_neigs += B_neigs2; 3417 } 3418 break; 3419 case 2: 3420 if (scal) { 3421 bb[0] = PETSC_MIN_REAL; 3422 bb[1] = 0; 3423 #if defined(PETSC_USE_COMPLEX) 3424 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3425 #else 3426 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3427 #endif 3428 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3429 } else { 3430 PetscBLASInt B_neigs2 = 0; 3431 PetscBool import = PETSC_FALSE; 3432 3433 lthresh = PetscMax(lthresh,0.0); 3434 if (lthresh > 0.0) { 3435 bb[0] = PETSC_MIN_REAL; 3436 bb[1] = lthresh*lthresh; 3437 3438 import = PETSC_TRUE; 3439 #if defined(PETSC_USE_COMPLEX) 3440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3441 #else 3442 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3443 #endif 3444 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3445 } 3446 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3447 bb[1] = PETSC_MAX_REAL; 3448 if (import) { 3449 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3450 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3451 } 3452 #if defined(PETSC_USE_COMPLEX) 3453 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3454 #else 3455 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3456 #endif 3457 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3458 B_neigs += B_neigs2; 3459 } 3460 break; 3461 case 3: 3462 if (scal) { 3463 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3464 } else { 3465 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3466 } 3467 if (!scal) { 3468 bb[0] = uthresh; 3469 bb[1] = PETSC_MAX_REAL; 3470 #if defined(PETSC_USE_COMPLEX) 3471 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3472 #else 3473 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3474 #endif 3475 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3476 } 3477 if (recipe_m > 0 && B_N - B_neigs > 0) { 3478 PetscBLASInt B_neigs2 = 0; 3479 3480 B_IL = 1; 3481 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3482 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3483 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3484 #if defined(PETSC_USE_COMPLEX) 3485 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3486 #else 3487 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3488 #endif 3489 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3490 B_neigs += B_neigs2; 3491 } 3492 break; 3493 case 4: 3494 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3495 #if defined(PETSC_USE_COMPLEX) 3496 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3497 #else 3498 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3499 #endif 3500 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3501 { 3502 PetscBLASInt B_neigs2 = 0; 3503 3504 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3505 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3506 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3507 #if defined(PETSC_USE_COMPLEX) 3508 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3509 #else 3510 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3511 #endif 3512 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3513 B_neigs += B_neigs2; 3514 } 3515 break; 3516 case 5: /* same as before: first compute all eigenvalues, then filter */ 3517 #if defined(PETSC_USE_COMPLEX) 3518 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3519 #else 3520 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3521 #endif 3522 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3523 { 3524 PetscInt e,k,ne; 3525 for (e=0,ne=0;e<B_neigs;e++) { 3526 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3527 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3528 eigs[ne] = eigs[e]; 3529 ne++; 3530 } 3531 } 3532 ierr = PetscArraycpy(eigv,S,B_N*ne);CHKERRQ(ierr); 3533 B_neigs = ne; 3534 } 3535 break; 3536 default: 3537 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3538 } 3539 } 3540 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3541 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3542 B_IL = 1; 3543 #if defined(PETSC_USE_COMPLEX) 3544 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3545 #else 3546 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3547 #endif 3548 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3549 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3550 PetscInt k; 3551 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3552 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3553 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3554 nmin = nmax; 3555 ierr = PetscArrayzero(eigv,subset_size*nmax);CHKERRQ(ierr); 3556 for (k=0;k<nmax;k++) { 3557 eigs[k] = 1./PETSC_SMALL; 3558 eigv[k*(subset_size+1)] = 1.0; 3559 } 3560 } 3561 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3562 if (B_ierr) { 3563 if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3564 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3565 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3566 } 3567 3568 if (B_neigs > nmax) { 3569 if (pcbddc->dbg_flag) { 3570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3571 } 3572 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3573 B_neigs = nmax; 3574 } 3575 3576 nmin_s = PetscMin(nmin,B_N); 3577 if (B_neigs < nmin_s) { 3578 PetscBLASInt B_neigs2 = 0; 3579 3580 if (pcbddc->use_deluxe_scaling) { 3581 if (scal) { 3582 B_IU = nmin_s; 3583 B_IL = B_neigs + 1; 3584 } else { 3585 B_IL = B_N - nmin_s + 1; 3586 B_IU = B_N - B_neigs; 3587 } 3588 } else { 3589 B_IL = B_neigs + 1; 3590 B_IU = nmin_s; 3591 } 3592 if (pcbddc->dbg_flag) { 3593 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3594 } 3595 if (sub_schurs->is_symmetric) { 3596 PetscInt j,k; 3597 for (j=0;j<subset_size;j++) { 3598 for (k=j;k<subset_size;k++) { 3599 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3600 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3601 } 3602 } 3603 } else { 3604 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3605 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3606 } 3607 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3608 #if defined(PETSC_USE_COMPLEX) 3609 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3610 #else 3611 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3612 #endif 3613 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3614 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3615 B_neigs += B_neigs2; 3616 } 3617 if (B_ierr) { 3618 if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3619 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3620 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3621 } 3622 if (pcbddc->dbg_flag) { 3623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3624 for (j=0;j<B_neigs;j++) { 3625 if (eigs[j] == 0.0) { 3626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3627 } else { 3628 if (pcbddc->use_deluxe_scaling) { 3629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3630 } else { 3631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3632 } 3633 } 3634 } 3635 } 3636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3637 } 3638 /* change the basis back to the original one */ 3639 if (sub_schurs->change) { 3640 Mat change,phi,phit; 3641 3642 if (pcbddc->dbg_flag > 2) { 3643 PetscInt ii; 3644 for (ii=0;ii<B_neigs;ii++) { 3645 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3646 for (j=0;j<B_N;j++) { 3647 #if defined(PETSC_USE_COMPLEX) 3648 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3649 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3650 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3651 #else 3652 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3653 #endif 3654 } 3655 } 3656 } 3657 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3658 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3659 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3660 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3661 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3662 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3663 } 3664 maxneigs = PetscMax(B_neigs,maxneigs); 3665 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3666 if (B_neigs) { 3667 ierr = PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);CHKERRQ(ierr); 3668 3669 if (pcbddc->dbg_flag > 1) { 3670 PetscInt ii; 3671 for (ii=0;ii<B_neigs;ii++) { 3672 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3673 for (j=0;j<B_N;j++) { 3674 #if defined(PETSC_USE_COMPLEX) 3675 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3676 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3677 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3678 #else 3679 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3680 #endif 3681 } 3682 } 3683 } 3684 ierr = PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);CHKERRQ(ierr); 3685 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3686 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3687 cum++; 3688 } 3689 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3690 /* shift for next computation */ 3691 cumarray += subset_size*subset_size; 3692 } 3693 if (pcbddc->dbg_flag) { 3694 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3695 } 3696 3697 if (mss) { 3698 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3699 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3700 /* destroy matrices (junk) */ 3701 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3702 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3703 } 3704 if (allocated_S_St) { 3705 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3706 } 3707 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3708 #if defined(PETSC_USE_COMPLEX) 3709 ierr = PetscFree(rwork);CHKERRQ(ierr); 3710 #endif 3711 if (pcbddc->dbg_flag) { 3712 PetscInt maxneigs_r; 3713 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3714 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3715 } 3716 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3717 PetscFunctionReturn(0); 3718 } 3719 3720 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3721 { 3722 PetscScalar *coarse_submat_vals; 3723 PetscErrorCode ierr; 3724 3725 PetscFunctionBegin; 3726 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3727 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3728 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3729 3730 /* Setup local neumann solver ksp_R */ 3731 /* PCBDDCSetUpLocalScatters should be called first! */ 3732 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3733 3734 /* 3735 Setup local correction and local part of coarse basis. 3736 Gives back the dense local part of the coarse matrix in column major ordering 3737 */ 3738 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3739 3740 /* Compute total number of coarse nodes and setup coarse solver */ 3741 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3742 3743 /* free */ 3744 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3745 PetscFunctionReturn(0); 3746 } 3747 3748 PetscErrorCode PCBDDCResetCustomization(PC pc) 3749 { 3750 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3751 PetscErrorCode ierr; 3752 3753 PetscFunctionBegin; 3754 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3755 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3756 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3757 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3758 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3759 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3760 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3761 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3762 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3763 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3764 PetscFunctionReturn(0); 3765 } 3766 3767 PetscErrorCode PCBDDCResetTopography(PC pc) 3768 { 3769 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3770 PetscInt i; 3771 PetscErrorCode ierr; 3772 3773 PetscFunctionBegin; 3774 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3775 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3776 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3777 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3778 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3779 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3780 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3781 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3782 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3783 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3784 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3785 for (i=0;i<pcbddc->n_local_subs;i++) { 3786 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3787 } 3788 pcbddc->n_local_subs = 0; 3789 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3790 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3791 pcbddc->graphanalyzed = PETSC_FALSE; 3792 pcbddc->recompute_topography = PETSC_TRUE; 3793 pcbddc->corner_selected = PETSC_FALSE; 3794 PetscFunctionReturn(0); 3795 } 3796 3797 PetscErrorCode PCBDDCResetSolvers(PC pc) 3798 { 3799 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3800 PetscErrorCode ierr; 3801 3802 PetscFunctionBegin; 3803 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3804 if (pcbddc->coarse_phi_B) { 3805 PetscScalar *array; 3806 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3807 ierr = PetscFree(array);CHKERRQ(ierr); 3808 } 3809 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3810 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3811 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3812 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3813 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3814 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3815 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3816 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3817 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3818 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3819 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3820 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3821 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3822 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3823 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3824 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3825 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3826 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3827 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3828 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3829 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3830 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3831 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3832 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3833 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3834 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3835 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3836 if (pcbddc->benign_zerodiag_subs) { 3837 PetscInt i; 3838 for (i=0;i<pcbddc->benign_n;i++) { 3839 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3840 } 3841 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3842 } 3843 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3844 PetscFunctionReturn(0); 3845 } 3846 3847 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3848 { 3849 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3850 PC_IS *pcis = (PC_IS*)pc->data; 3851 VecType impVecType; 3852 PetscInt n_constraints,n_R,old_size; 3853 PetscErrorCode ierr; 3854 3855 PetscFunctionBegin; 3856 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3857 n_R = pcis->n - pcbddc->n_vertices; 3858 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3859 /* local work vectors (try to avoid unneeded work)*/ 3860 /* R nodes */ 3861 old_size = -1; 3862 if (pcbddc->vec1_R) { 3863 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3864 } 3865 if (n_R != old_size) { 3866 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3867 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3868 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3869 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3870 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3871 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3872 } 3873 /* local primal dofs */ 3874 old_size = -1; 3875 if (pcbddc->vec1_P) { 3876 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3877 } 3878 if (pcbddc->local_primal_size != old_size) { 3879 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3880 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3881 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3882 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3883 } 3884 /* local explicit constraints */ 3885 old_size = -1; 3886 if (pcbddc->vec1_C) { 3887 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3888 } 3889 if (n_constraints && n_constraints != old_size) { 3890 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3891 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3892 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3893 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3894 } 3895 PetscFunctionReturn(0); 3896 } 3897 3898 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3899 { 3900 PetscErrorCode ierr; 3901 /* pointers to pcis and pcbddc */ 3902 PC_IS* pcis = (PC_IS*)pc->data; 3903 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3904 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3905 /* submatrices of local problem */ 3906 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3907 /* submatrices of local coarse problem */ 3908 Mat S_VV,S_CV,S_VC,S_CC; 3909 /* working matrices */ 3910 Mat C_CR; 3911 /* additional working stuff */ 3912 PC pc_R; 3913 Mat F,Brhs = NULL; 3914 Vec dummy_vec; 3915 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3916 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3917 PetscScalar *work; 3918 PetscInt *idx_V_B; 3919 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3920 PetscInt i,n_R,n_D,n_B; 3921 PetscScalar one=1.0,m_one=-1.0; 3922 3923 PetscFunctionBegin; 3924 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3925 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3926 3927 /* Set Non-overlapping dimensions */ 3928 n_vertices = pcbddc->n_vertices; 3929 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3930 n_B = pcis->n_B; 3931 n_D = pcis->n - n_B; 3932 n_R = pcis->n - n_vertices; 3933 3934 /* vertices in boundary numbering */ 3935 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3936 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3937 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3938 3939 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3940 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3942 ierr = MatDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3943 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3944 ierr = MatDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3945 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3946 ierr = MatDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3947 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3948 ierr = MatDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3949 3950 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3951 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3952 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3953 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3954 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3955 lda_rhs = n_R; 3956 need_benign_correction = PETSC_FALSE; 3957 if (isLU || isCHOL) { 3958 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3959 } else if (sub_schurs && sub_schurs->reuse_solver) { 3960 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3961 MatFactorType type; 3962 3963 F = reuse_solver->F; 3964 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3965 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3966 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3967 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3968 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3969 } else F = NULL; 3970 3971 /* determine if we can use a sparse right-hand side */ 3972 sparserhs = PETSC_FALSE; 3973 if (F) { 3974 MatSolverType solver; 3975 3976 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3977 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3978 } 3979 3980 /* allocate workspace */ 3981 n = 0; 3982 if (n_constraints) { 3983 n += lda_rhs*n_constraints; 3984 } 3985 if (n_vertices) { 3986 n = PetscMax(2*lda_rhs*n_vertices,n); 3987 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3988 } 3989 if (!pcbddc->symmetric_primal) { 3990 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3991 } 3992 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3993 3994 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3995 dummy_vec = NULL; 3996 if (need_benign_correction && lda_rhs != n_R && F) { 3997 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3998 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 3999 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 4000 } 4001 4002 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 4003 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 4004 4005 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4006 if (n_constraints) { 4007 Mat M3,C_B; 4008 IS is_aux; 4009 PetscScalar *array,*array2; 4010 4011 /* Extract constraints on R nodes: C_{CR} */ 4012 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 4013 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 4014 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4015 4016 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4017 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4018 if (!sparserhs) { 4019 ierr = PetscArrayzero(work,lda_rhs*n_constraints);CHKERRQ(ierr); 4020 for (i=0;i<n_constraints;i++) { 4021 const PetscScalar *row_cmat_values; 4022 const PetscInt *row_cmat_indices; 4023 PetscInt size_of_constraint,j; 4024 4025 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4026 for (j=0;j<size_of_constraint;j++) { 4027 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4028 } 4029 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4030 } 4031 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 4032 } else { 4033 Mat tC_CR; 4034 4035 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4036 if (lda_rhs != n_R) { 4037 PetscScalar *aa; 4038 PetscInt r,*ii,*jj; 4039 PetscBool done; 4040 4041 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4042 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4043 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 4044 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 4045 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4046 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4047 } else { 4048 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 4049 tC_CR = C_CR; 4050 } 4051 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 4052 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 4053 } 4054 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 4055 if (F) { 4056 if (need_benign_correction) { 4057 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4058 4059 /* rhs is already zero on interior dofs, no need to change the rhs */ 4060 ierr = PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);CHKERRQ(ierr); 4061 } 4062 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 4063 if (need_benign_correction) { 4064 PetscScalar *marr; 4065 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4066 4067 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4068 if (lda_rhs != n_R) { 4069 for (i=0;i<n_constraints;i++) { 4070 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4071 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4072 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4073 } 4074 } else { 4075 for (i=0;i<n_constraints;i++) { 4076 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4077 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4078 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4079 } 4080 } 4081 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4082 } 4083 } else { 4084 PetscScalar *marr; 4085 4086 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4087 for (i=0;i<n_constraints;i++) { 4088 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4089 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 4090 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4091 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4092 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4093 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4094 } 4095 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4096 } 4097 if (sparserhs) { 4098 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4099 } 4100 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4101 if (!pcbddc->switch_static) { 4102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4103 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4104 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4105 for (i=0;i<n_constraints;i++) { 4106 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4107 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4108 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4109 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4110 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4111 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4112 } 4113 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4114 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4115 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4116 } else { 4117 if (lda_rhs != n_R) { 4118 IS dummy; 4119 4120 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4121 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4122 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4123 } else { 4124 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4125 pcbddc->local_auxmat2 = local_auxmat2_R; 4126 } 4127 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4128 } 4129 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4130 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4131 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4132 if (isCHOL) { 4133 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4134 } else { 4135 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4136 } 4137 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4138 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4139 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4140 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4141 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4142 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4143 } 4144 4145 /* Get submatrices from subdomain matrix */ 4146 if (n_vertices) { 4147 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4148 PetscBool oldpin; 4149 #endif 4150 PetscBool isaij; 4151 IS is_aux; 4152 4153 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4154 IS tis; 4155 4156 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4157 ierr = ISSort(tis);CHKERRQ(ierr); 4158 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4159 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4160 } else { 4161 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4162 } 4163 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4164 oldpin = pcbddc->local_mat->boundtocpu; 4165 #endif 4166 ierr = MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);CHKERRQ(ierr); 4167 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4168 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4169 ierr = PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);CHKERRQ(ierr); 4170 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4171 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4172 } 4173 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4174 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4175 ierr = MatBindToCPU(pcbddc->local_mat,oldpin);CHKERRQ(ierr); 4176 #endif 4177 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4178 } 4179 4180 /* Matrix of coarse basis functions (local) */ 4181 if (pcbddc->coarse_phi_B) { 4182 PetscInt on_B,on_primal,on_D=n_D; 4183 if (pcbddc->coarse_phi_D) { 4184 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4185 } 4186 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4187 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4188 PetscScalar *marray; 4189 4190 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4191 ierr = PetscFree(marray);CHKERRQ(ierr); 4192 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4193 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4194 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4195 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4196 } 4197 } 4198 4199 if (!pcbddc->coarse_phi_B) { 4200 PetscScalar *marr; 4201 4202 /* memory size */ 4203 n = n_B*pcbddc->local_primal_size; 4204 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4205 if (!pcbddc->symmetric_primal) n *= 2; 4206 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4207 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4208 marr += n_B*pcbddc->local_primal_size; 4209 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4210 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4211 marr += n_D*pcbddc->local_primal_size; 4212 } 4213 if (!pcbddc->symmetric_primal) { 4214 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4215 marr += n_B*pcbddc->local_primal_size; 4216 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4217 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4218 } 4219 } else { 4220 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4221 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4222 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4223 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4224 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4225 } 4226 } 4227 } 4228 4229 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4230 p0_lidx_I = NULL; 4231 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4232 const PetscInt *idxs; 4233 4234 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4235 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4236 for (i=0;i<pcbddc->benign_n;i++) { 4237 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4238 } 4239 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4240 } 4241 4242 /* vertices */ 4243 if (n_vertices) { 4244 PetscBool restoreavr = PETSC_FALSE; 4245 4246 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4247 4248 if (n_R) { 4249 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4250 PetscBLASInt B_N,B_one = 1; 4251 const PetscScalar *x; 4252 PetscScalar *y; 4253 4254 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4255 if (need_benign_correction) { 4256 ISLocalToGlobalMapping RtoN; 4257 IS is_p0; 4258 PetscInt *idxs_p0,n; 4259 4260 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4261 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4262 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4263 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4264 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4265 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4266 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4267 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4268 } 4269 4270 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4271 if (!sparserhs || need_benign_correction) { 4272 if (lda_rhs == n_R) { 4273 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4274 } else { 4275 PetscScalar *av,*array; 4276 const PetscInt *xadj,*adjncy; 4277 PetscInt n; 4278 PetscBool flg_row; 4279 4280 array = work+lda_rhs*n_vertices; 4281 ierr = PetscArrayzero(array,lda_rhs*n_vertices);CHKERRQ(ierr); 4282 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4283 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4284 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4285 for (i=0;i<n;i++) { 4286 PetscInt j; 4287 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4288 } 4289 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4290 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4291 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4292 } 4293 if (need_benign_correction) { 4294 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4295 PetscScalar *marr; 4296 4297 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4298 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4299 4300 | 0 0 0 | (V) 4301 L = | 0 0 -1 | (P-p0) 4302 | 0 0 -1 | (p0) 4303 4304 */ 4305 for (i=0;i<reuse_solver->benign_n;i++) { 4306 const PetscScalar *vals; 4307 const PetscInt *idxs,*idxs_zero; 4308 PetscInt n,j,nz; 4309 4310 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4311 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4312 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4313 for (j=0;j<n;j++) { 4314 PetscScalar val = vals[j]; 4315 PetscInt k,col = idxs[j]; 4316 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4317 } 4318 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4319 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4320 } 4321 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4322 } 4323 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4324 Brhs = A_RV; 4325 } else { 4326 Mat tA_RVT,A_RVT; 4327 4328 if (!pcbddc->symmetric_primal) { 4329 /* A_RV already scaled by -1 */ 4330 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4331 } else { 4332 restoreavr = PETSC_TRUE; 4333 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4334 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4335 A_RVT = A_VR; 4336 } 4337 if (lda_rhs != n_R) { 4338 PetscScalar *aa; 4339 PetscInt r,*ii,*jj; 4340 PetscBool done; 4341 4342 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4343 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4344 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4345 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4346 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4347 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4348 } else { 4349 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4350 tA_RVT = A_RVT; 4351 } 4352 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4353 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4354 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4355 } 4356 if (F) { 4357 /* need to correct the rhs */ 4358 if (need_benign_correction) { 4359 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4360 PetscScalar *marr; 4361 4362 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4363 if (lda_rhs != n_R) { 4364 for (i=0;i<n_vertices;i++) { 4365 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4366 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4367 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4368 } 4369 } else { 4370 for (i=0;i<n_vertices;i++) { 4371 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4372 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4373 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4374 } 4375 } 4376 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4377 } 4378 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4379 if (restoreavr) { 4380 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4381 } 4382 /* need to correct the solution */ 4383 if (need_benign_correction) { 4384 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4385 PetscScalar *marr; 4386 4387 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4388 if (lda_rhs != n_R) { 4389 for (i=0;i<n_vertices;i++) { 4390 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4391 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4392 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4393 } 4394 } else { 4395 for (i=0;i<n_vertices;i++) { 4396 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4397 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4398 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4399 } 4400 } 4401 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4402 } 4403 } else { 4404 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4405 for (i=0;i<n_vertices;i++) { 4406 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4407 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4408 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4409 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4410 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4411 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4412 } 4413 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4414 } 4415 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4416 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4417 /* S_VV and S_CV */ 4418 if (n_constraints) { 4419 Mat B; 4420 4421 ierr = PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);CHKERRQ(ierr); 4422 for (i=0;i<n_vertices;i++) { 4423 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4424 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4425 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4426 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4427 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4428 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4429 } 4430 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4431 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4432 ierr = MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);CHKERRQ(ierr); 4433 ierr = MatProductSetType(S_CV,MATPRODUCT_AB);CHKERRQ(ierr); 4434 ierr = MatProductSetFromOptions(S_CV);CHKERRQ(ierr); 4435 ierr = MatProductSymbolic(S_CV);CHKERRQ(ierr); 4436 ierr = MatProductNumeric(S_CV);CHKERRQ(ierr); 4437 ierr = MatProductClear(S_CV);CHKERRQ(ierr); 4438 4439 ierr = MatDestroy(&B);CHKERRQ(ierr); 4440 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4441 /* Reuse B = local_auxmat2_R * S_CV */ 4442 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);CHKERRQ(ierr); 4443 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4444 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4445 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4446 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4447 4448 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4449 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4450 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4451 ierr = MatDestroy(&B);CHKERRQ(ierr); 4452 } 4453 if (lda_rhs != n_R) { 4454 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4455 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4456 ierr = MatDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4457 } 4458 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4459 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4460 if (need_benign_correction) { 4461 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4462 PetscScalar *marr,*sums; 4463 4464 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4465 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4466 for (i=0;i<reuse_solver->benign_n;i++) { 4467 const PetscScalar *vals; 4468 const PetscInt *idxs,*idxs_zero; 4469 PetscInt n,j,nz; 4470 4471 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4472 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4473 for (j=0;j<n_vertices;j++) { 4474 PetscInt k; 4475 sums[j] = 0.; 4476 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4477 } 4478 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4479 for (j=0;j<n;j++) { 4480 PetscScalar val = vals[j]; 4481 PetscInt k; 4482 for (k=0;k<n_vertices;k++) { 4483 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4484 } 4485 } 4486 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4487 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4488 } 4489 ierr = PetscFree(sums);CHKERRQ(ierr); 4490 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4491 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4492 } 4493 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4494 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4495 ierr = MatDenseGetArrayRead(A_VV,&x);CHKERRQ(ierr); 4496 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4497 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4498 ierr = MatDenseRestoreArrayRead(A_VV,&x);CHKERRQ(ierr); 4499 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4500 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4501 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4502 } else { 4503 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4504 } 4505 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4506 4507 /* coarse basis functions */ 4508 for (i=0;i<n_vertices;i++) { 4509 PetscScalar *y; 4510 4511 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4512 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4513 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4514 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4515 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4516 y[n_B*i+idx_V_B[i]] = 1.0; 4517 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4518 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4519 4520 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4521 PetscInt j; 4522 4523 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4524 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4525 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4526 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4527 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4528 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4529 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4530 } 4531 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4532 } 4533 /* if n_R == 0 the object is not destroyed */ 4534 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4535 } 4536 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4537 4538 if (n_constraints) { 4539 Mat B; 4540 4541 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4542 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4543 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);CHKERRQ(ierr); 4544 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4545 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4546 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4547 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4548 4549 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4550 if (n_vertices) { 4551 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4552 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4553 } else { 4554 Mat S_VCt; 4555 4556 if (lda_rhs != n_R) { 4557 ierr = MatDestroy(&B);CHKERRQ(ierr); 4558 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4559 ierr = MatDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4560 } 4561 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4562 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4563 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4564 } 4565 } 4566 ierr = MatDestroy(&B);CHKERRQ(ierr); 4567 /* coarse basis functions */ 4568 for (i=0;i<n_constraints;i++) { 4569 PetscScalar *y; 4570 4571 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4572 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4573 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4574 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4575 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4576 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4577 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4578 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4579 PetscInt j; 4580 4581 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4582 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4583 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4584 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4585 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4586 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4587 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4588 } 4589 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4590 } 4591 } 4592 if (n_constraints) { 4593 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4594 } 4595 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4596 4597 /* coarse matrix entries relative to B_0 */ 4598 if (pcbddc->benign_n) { 4599 Mat B0_B,B0_BPHI; 4600 IS is_dummy; 4601 const PetscScalar *data; 4602 PetscInt j; 4603 4604 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4605 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4606 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4607 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4608 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4609 ierr = MatDenseGetArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4610 for (j=0;j<pcbddc->benign_n;j++) { 4611 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4612 for (i=0;i<pcbddc->local_primal_size;i++) { 4613 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4614 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4615 } 4616 } 4617 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4618 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4619 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4620 } 4621 4622 /* compute other basis functions for non-symmetric problems */ 4623 if (!pcbddc->symmetric_primal) { 4624 Mat B_V=NULL,B_C=NULL; 4625 PetscScalar *marray; 4626 4627 if (n_constraints) { 4628 Mat S_CCT,C_CRT; 4629 4630 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4631 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4632 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4633 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4634 if (n_vertices) { 4635 Mat S_VCT; 4636 4637 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4638 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4639 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4640 } 4641 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4642 } else { 4643 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4644 } 4645 if (n_vertices && n_R) { 4646 PetscScalar *av,*marray; 4647 const PetscInt *xadj,*adjncy; 4648 PetscInt n; 4649 PetscBool flg_row; 4650 4651 /* B_V = B_V - A_VR^T */ 4652 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4653 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4654 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4655 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4656 for (i=0;i<n;i++) { 4657 PetscInt j; 4658 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4659 } 4660 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4661 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4662 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4663 } 4664 4665 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4666 if (n_vertices) { 4667 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4668 for (i=0;i<n_vertices;i++) { 4669 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4670 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4671 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4672 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4673 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4674 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4675 } 4676 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4677 } 4678 if (B_C) { 4679 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4680 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4681 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4682 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4683 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4684 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4685 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4686 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4687 } 4688 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4689 } 4690 /* coarse basis functions */ 4691 for (i=0;i<pcbddc->local_primal_size;i++) { 4692 PetscScalar *y; 4693 4694 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4695 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4696 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4697 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4698 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4699 if (i<n_vertices) { 4700 y[n_B*i+idx_V_B[i]] = 1.0; 4701 } 4702 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4703 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4704 4705 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4706 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4707 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4708 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4709 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4710 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4711 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4712 } 4713 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4714 } 4715 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4716 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4717 } 4718 4719 /* free memory */ 4720 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4721 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4722 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4723 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4724 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4725 ierr = PetscFree(work);CHKERRQ(ierr); 4726 if (n_vertices) { 4727 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4728 } 4729 if (n_constraints) { 4730 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4731 } 4732 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4733 4734 /* Checking coarse_sub_mat and coarse basis functios */ 4735 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4736 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4737 if (pcbddc->dbg_flag) { 4738 Mat coarse_sub_mat; 4739 Mat AUXMAT,TM1,TM2,TM3,TM4; 4740 Mat coarse_phi_D,coarse_phi_B; 4741 Mat coarse_psi_D,coarse_psi_B; 4742 Mat A_II,A_BB,A_IB,A_BI; 4743 Mat C_B,CPHI; 4744 IS is_dummy; 4745 Vec mones; 4746 MatType checkmattype=MATSEQAIJ; 4747 PetscReal real_value; 4748 4749 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4750 Mat A; 4751 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4752 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4753 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4754 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4755 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4756 ierr = MatDestroy(&A);CHKERRQ(ierr); 4757 } else { 4758 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4759 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4760 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4761 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4762 } 4763 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4764 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4765 if (!pcbddc->symmetric_primal) { 4766 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4767 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4768 } 4769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4770 4771 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4772 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4773 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4774 if (!pcbddc->symmetric_primal) { 4775 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4776 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4777 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4778 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4779 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4780 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4781 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4782 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4783 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4784 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4785 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4786 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4787 } else { 4788 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4789 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4790 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4791 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4792 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4793 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4794 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4795 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4796 } 4797 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4798 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4799 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4800 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4801 if (pcbddc->benign_n) { 4802 Mat B0_B,B0_BPHI; 4803 const PetscScalar *data2; 4804 PetscScalar *data; 4805 PetscInt j; 4806 4807 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4808 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4809 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4810 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4811 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4812 ierr = MatDenseGetArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4813 for (j=0;j<pcbddc->benign_n;j++) { 4814 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4815 for (i=0;i<pcbddc->local_primal_size;i++) { 4816 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4817 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4818 } 4819 } 4820 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4821 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4822 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4823 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4824 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4825 } 4826 #if 0 4827 { 4828 PetscViewer viewer; 4829 char filename[256]; 4830 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4831 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4832 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4833 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4834 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4835 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4836 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4837 if (pcbddc->coarse_phi_B) { 4838 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4839 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4840 } 4841 if (pcbddc->coarse_phi_D) { 4842 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4843 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4844 } 4845 if (pcbddc->coarse_psi_B) { 4846 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4847 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4848 } 4849 if (pcbddc->coarse_psi_D) { 4850 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4851 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4852 } 4853 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4854 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4855 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4856 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4857 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4858 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4859 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4860 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4861 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4862 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4863 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4864 } 4865 #endif 4866 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4867 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4868 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4869 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4870 4871 /* check constraints */ 4872 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4873 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4874 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4875 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4876 } else { 4877 PetscScalar *data; 4878 Mat tmat; 4879 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4880 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4881 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4882 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4883 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4884 } 4885 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4886 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4887 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4888 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4889 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4890 if (!pcbddc->symmetric_primal) { 4891 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4892 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4893 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4894 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4895 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4896 } 4897 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4898 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4899 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4900 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4901 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4902 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4903 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4904 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4905 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4906 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4907 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4908 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4909 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4910 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4911 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4912 if (!pcbddc->symmetric_primal) { 4913 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4914 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4915 } 4916 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4917 } 4918 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4919 { 4920 PetscBool gpu; 4921 4922 ierr = PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);CHKERRQ(ierr); 4923 if (gpu) { 4924 if (pcbddc->local_auxmat1) { 4925 ierr = MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4926 } 4927 if (pcbddc->local_auxmat2) { 4928 ierr = MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4929 } 4930 if (pcbddc->coarse_phi_B) { 4931 ierr = MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4932 } 4933 if (pcbddc->coarse_phi_D) { 4934 ierr = MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4935 } 4936 if (pcbddc->coarse_psi_B) { 4937 ierr = MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4938 } 4939 if (pcbddc->coarse_psi_D) { 4940 ierr = MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4941 } 4942 } 4943 } 4944 /* get back data */ 4945 *coarse_submat_vals_n = coarse_submat_vals; 4946 PetscFunctionReturn(0); 4947 } 4948 4949 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4950 { 4951 Mat *work_mat; 4952 IS isrow_s,iscol_s; 4953 PetscBool rsorted,csorted; 4954 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4955 PetscErrorCode ierr; 4956 4957 PetscFunctionBegin; 4958 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4959 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4960 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4961 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4962 4963 if (!rsorted) { 4964 const PetscInt *idxs; 4965 PetscInt *idxs_sorted,i; 4966 4967 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4968 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4969 for (i=0;i<rsize;i++) { 4970 idxs_perm_r[i] = i; 4971 } 4972 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4973 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4974 for (i=0;i<rsize;i++) { 4975 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4976 } 4977 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4978 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4979 } else { 4980 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4981 isrow_s = isrow; 4982 } 4983 4984 if (!csorted) { 4985 if (isrow == iscol) { 4986 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4987 iscol_s = isrow_s; 4988 } else { 4989 const PetscInt *idxs; 4990 PetscInt *idxs_sorted,i; 4991 4992 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4993 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4994 for (i=0;i<csize;i++) { 4995 idxs_perm_c[i] = i; 4996 } 4997 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4998 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4999 for (i=0;i<csize;i++) { 5000 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5001 } 5002 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 5003 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 5004 } 5005 } else { 5006 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 5007 iscol_s = iscol; 5008 } 5009 5010 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5011 5012 if (!rsorted || !csorted) { 5013 Mat new_mat; 5014 IS is_perm_r,is_perm_c; 5015 5016 if (!rsorted) { 5017 PetscInt *idxs_r,i; 5018 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 5019 for (i=0;i<rsize;i++) { 5020 idxs_r[idxs_perm_r[i]] = i; 5021 } 5022 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 5023 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 5024 } else { 5025 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 5026 } 5027 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 5028 5029 if (!csorted) { 5030 if (isrow_s == iscol_s) { 5031 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 5032 is_perm_c = is_perm_r; 5033 } else { 5034 PetscInt *idxs_c,i; 5035 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5036 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 5037 for (i=0;i<csize;i++) { 5038 idxs_c[idxs_perm_c[i]] = i; 5039 } 5040 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 5041 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 5042 } 5043 } else { 5044 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 5045 } 5046 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 5047 5048 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 5049 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 5050 work_mat[0] = new_mat; 5051 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 5052 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 5053 } 5054 5055 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 5056 *B = work_mat[0]; 5057 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 5058 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 5059 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 5060 PetscFunctionReturn(0); 5061 } 5062 5063 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5064 { 5065 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5066 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5067 Mat new_mat,lA; 5068 IS is_local,is_global; 5069 PetscInt local_size; 5070 PetscBool isseqaij; 5071 PetscErrorCode ierr; 5072 5073 PetscFunctionBegin; 5074 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5075 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 5076 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 5077 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 5078 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 5079 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 5080 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5081 5082 if (pcbddc->dbg_flag) { 5083 Vec x,x_change; 5084 PetscReal error; 5085 5086 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 5087 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5088 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 5089 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5090 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5091 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 5092 if (!pcbddc->change_interior) { 5093 const PetscScalar *x,*y,*v; 5094 PetscReal lerror = 0.; 5095 PetscInt i; 5096 5097 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 5098 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 5099 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 5100 for (i=0;i<local_size;i++) 5101 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5102 lerror = PetscAbsScalar(x[i]-y[i]); 5103 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 5104 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 5105 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 5106 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5107 if (error > PETSC_SMALL) { 5108 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5109 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5110 } else { 5111 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5112 } 5113 } 5114 } 5115 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5116 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5117 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5118 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5119 if (error > PETSC_SMALL) { 5120 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5121 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5122 } else { 5123 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5124 } 5125 } 5126 ierr = VecDestroy(&x);CHKERRQ(ierr); 5127 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5128 } 5129 5130 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5131 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 5132 5133 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5134 ierr = PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5135 if (isseqaij) { 5136 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5137 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5138 if (lA) { 5139 Mat work; 5140 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5141 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5142 ierr = MatDestroy(&work);CHKERRQ(ierr); 5143 } 5144 } else { 5145 Mat work_mat; 5146 5147 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5148 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5149 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5150 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 5151 if (lA) { 5152 Mat work; 5153 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5154 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5155 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5156 ierr = MatDestroy(&work);CHKERRQ(ierr); 5157 } 5158 } 5159 if (matis->A->symmetric_set) { 5160 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 5161 #if !defined(PETSC_USE_COMPLEX) 5162 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 5163 #endif 5164 } 5165 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5166 PetscFunctionReturn(0); 5167 } 5168 5169 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5170 { 5171 PC_IS* pcis = (PC_IS*)(pc->data); 5172 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5173 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5174 PetscInt *idx_R_local=NULL; 5175 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5176 PetscInt vbs,bs; 5177 PetscBT bitmask=NULL; 5178 PetscErrorCode ierr; 5179 5180 PetscFunctionBegin; 5181 /* 5182 No need to setup local scatters if 5183 - primal space is unchanged 5184 AND 5185 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5186 AND 5187 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5188 */ 5189 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5190 PetscFunctionReturn(0); 5191 } 5192 /* destroy old objects */ 5193 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5194 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5195 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5196 /* Set Non-overlapping dimensions */ 5197 n_B = pcis->n_B; 5198 n_D = pcis->n - n_B; 5199 n_vertices = pcbddc->n_vertices; 5200 5201 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5202 5203 /* create auxiliary bitmask and allocate workspace */ 5204 if (!sub_schurs || !sub_schurs->reuse_solver) { 5205 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5206 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5207 for (i=0;i<n_vertices;i++) { 5208 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5209 } 5210 5211 for (i=0, n_R=0; i<pcis->n; i++) { 5212 if (!PetscBTLookup(bitmask,i)) { 5213 idx_R_local[n_R++] = i; 5214 } 5215 } 5216 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5217 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5218 5219 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5220 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5221 } 5222 5223 /* Block code */ 5224 vbs = 1; 5225 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5226 if (bs>1 && !(n_vertices%bs)) { 5227 PetscBool is_blocked = PETSC_TRUE; 5228 PetscInt *vary; 5229 if (!sub_schurs || !sub_schurs->reuse_solver) { 5230 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5231 ierr = PetscArrayzero(vary,pcis->n/bs);CHKERRQ(ierr); 5232 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5233 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5234 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5235 for (i=0; i<pcis->n/bs; i++) { 5236 if (vary[i]!=0 && vary[i]!=bs) { 5237 is_blocked = PETSC_FALSE; 5238 break; 5239 } 5240 } 5241 ierr = PetscFree(vary);CHKERRQ(ierr); 5242 } else { 5243 /* Verify directly the R set */ 5244 for (i=0; i<n_R/bs; i++) { 5245 PetscInt j,node=idx_R_local[bs*i]; 5246 for (j=1; j<bs; j++) { 5247 if (node != idx_R_local[bs*i+j]-j) { 5248 is_blocked = PETSC_FALSE; 5249 break; 5250 } 5251 } 5252 } 5253 } 5254 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5255 vbs = bs; 5256 for (i=0;i<n_R/vbs;i++) { 5257 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5258 } 5259 } 5260 } 5261 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5262 if (sub_schurs && sub_schurs->reuse_solver) { 5263 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5264 5265 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5266 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5267 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5268 reuse_solver->is_R = pcbddc->is_R_local; 5269 } else { 5270 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5271 } 5272 5273 /* print some info if requested */ 5274 if (pcbddc->dbg_flag) { 5275 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5277 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5279 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5281 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5282 } 5283 5284 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5285 if (!sub_schurs || !sub_schurs->reuse_solver) { 5286 IS is_aux1,is_aux2; 5287 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5288 5289 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5290 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5291 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5292 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5293 for (i=0; i<n_D; i++) { 5294 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5295 } 5296 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5297 for (i=0, j=0; i<n_R; i++) { 5298 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5299 aux_array1[j++] = i; 5300 } 5301 } 5302 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5303 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5304 for (i=0, j=0; i<n_B; i++) { 5305 if (!PetscBTLookup(bitmask,is_indices[i])) { 5306 aux_array2[j++] = i; 5307 } 5308 } 5309 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5310 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5311 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5312 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5313 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5314 5315 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5316 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5317 for (i=0, j=0; i<n_R; i++) { 5318 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5319 aux_array1[j++] = i; 5320 } 5321 } 5322 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5323 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5324 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5325 } 5326 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5327 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5328 } else { 5329 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5330 IS tis; 5331 PetscInt schur_size; 5332 5333 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5334 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5335 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5336 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5337 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5338 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5339 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5340 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5341 } 5342 } 5343 PetscFunctionReturn(0); 5344 } 5345 5346 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5347 { 5348 MatNullSpace NullSpace; 5349 Mat dmat; 5350 const Vec *nullvecs; 5351 Vec v,v2,*nullvecs2; 5352 VecScatter sct = NULL; 5353 PetscContainer c; 5354 PetscScalar *ddata; 5355 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5356 PetscBool nnsp_has_cnst; 5357 PetscErrorCode ierr; 5358 5359 PetscFunctionBegin; 5360 if (!is && !B) { /* MATIS */ 5361 Mat_IS* matis = (Mat_IS*)A->data; 5362 5363 if (!B) { 5364 ierr = MatISGetLocalMat(A,&B);CHKERRQ(ierr); 5365 } 5366 sct = matis->cctx; 5367 ierr = PetscObjectReference((PetscObject)sct);CHKERRQ(ierr); 5368 } else { 5369 ierr = MatGetNullSpace(B,&NullSpace);CHKERRQ(ierr); 5370 if (!NullSpace) { 5371 ierr = MatGetNearNullSpace(B,&NullSpace);CHKERRQ(ierr); 5372 } 5373 if (NullSpace) PetscFunctionReturn(0); 5374 } 5375 ierr = MatGetNullSpace(A,&NullSpace);CHKERRQ(ierr); 5376 if (!NullSpace) { 5377 ierr = MatGetNearNullSpace(A,&NullSpace);CHKERRQ(ierr); 5378 } 5379 if (!NullSpace) PetscFunctionReturn(0); 5380 5381 ierr = MatCreateVecs(A,&v,NULL);CHKERRQ(ierr); 5382 ierr = MatCreateVecs(B,&v2,NULL);CHKERRQ(ierr); 5383 if (!sct) { 5384 ierr = VecScatterCreate(v,is,v2,NULL,&sct);CHKERRQ(ierr); 5385 } 5386 ierr = MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);CHKERRQ(ierr); 5387 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5388 ierr = PetscMalloc1(bsiz,&nullvecs2);CHKERRQ(ierr); 5389 ierr = VecGetBlockSize(v2,&bs);CHKERRQ(ierr); 5390 ierr = VecGetSize(v2,&N);CHKERRQ(ierr); 5391 ierr = VecGetLocalSize(v2,&n);CHKERRQ(ierr); 5392 ierr = PetscMalloc1(n*bsiz,&ddata);CHKERRQ(ierr); 5393 for (k=0;k<nnsp_size;k++) { 5394 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);CHKERRQ(ierr); 5395 ierr = VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5396 ierr = VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5397 } 5398 if (nnsp_has_cnst) { 5399 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);CHKERRQ(ierr); 5400 ierr = VecSet(nullvecs2[nnsp_size],1.0);CHKERRQ(ierr); 5401 } 5402 ierr = PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);CHKERRQ(ierr); 5403 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);CHKERRQ(ierr); 5404 5405 ierr = MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);CHKERRQ(ierr); 5406 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);CHKERRQ(ierr); 5407 ierr = PetscContainerSetPointer(c,ddata);CHKERRQ(ierr); 5408 ierr = PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 5409 ierr = PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);CHKERRQ(ierr); 5410 ierr = PetscContainerDestroy(&c);CHKERRQ(ierr); 5411 ierr = PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);CHKERRQ(ierr); 5412 ierr = MatDestroy(&dmat);CHKERRQ(ierr); 5413 5414 for (k=0;k<bsiz;k++) { 5415 ierr = VecDestroy(&nullvecs2[k]);CHKERRQ(ierr); 5416 } 5417 ierr = PetscFree(nullvecs2);CHKERRQ(ierr); 5418 ierr = MatSetNearNullSpace(B,NullSpace);CHKERRQ(ierr); 5419 ierr = MatNullSpaceDestroy(&NullSpace);CHKERRQ(ierr); 5420 ierr = VecDestroy(&v);CHKERRQ(ierr); 5421 ierr = VecDestroy(&v2);CHKERRQ(ierr); 5422 ierr = VecScatterDestroy(&sct);CHKERRQ(ierr); 5423 PetscFunctionReturn(0); 5424 } 5425 5426 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5427 { 5428 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5429 PC_IS *pcis = (PC_IS*)pc->data; 5430 PC pc_temp; 5431 Mat A_RR; 5432 MatNullSpace nnsp; 5433 MatReuse reuse; 5434 PetscScalar m_one = -1.0; 5435 PetscReal value; 5436 PetscInt n_D,n_R; 5437 PetscBool issbaij,opts; 5438 PetscErrorCode ierr; 5439 void (*f)(void) = NULL; 5440 char dir_prefix[256],neu_prefix[256],str_level[16]; 5441 size_t len; 5442 5443 PetscFunctionBegin; 5444 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5445 /* approximate solver, propagate NearNullSpace if needed */ 5446 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5447 MatNullSpace gnnsp1,gnnsp2; 5448 PetscBool lhas,ghas; 5449 5450 ierr = MatGetNearNullSpace(pcbddc->local_mat,&nnsp);CHKERRQ(ierr); 5451 ierr = MatGetNearNullSpace(pc->pmat,&gnnsp1);CHKERRQ(ierr); 5452 ierr = MatGetNullSpace(pc->pmat,&gnnsp2);CHKERRQ(ierr); 5453 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5454 ierr = MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5455 if (!ghas && (gnnsp1 || gnnsp2)) { 5456 ierr = MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);CHKERRQ(ierr); 5457 } 5458 } 5459 5460 /* compute prefixes */ 5461 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5462 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5463 if (!pcbddc->current_level) { 5464 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5465 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5466 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5467 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5468 } else { 5469 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5470 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5471 len -= 15; /* remove "pc_bddc_coarse_" */ 5472 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5473 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5474 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5475 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5476 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5477 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5478 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5479 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5480 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5481 } 5482 5483 /* DIRICHLET PROBLEM */ 5484 if (dirichlet) { 5485 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5486 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5487 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5488 if (pcbddc->dbg_flag) { 5489 Mat A_IIn; 5490 5491 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5492 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5493 pcis->A_II = A_IIn; 5494 } 5495 } 5496 if (pcbddc->local_mat->symmetric_set) { 5497 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5498 } 5499 /* Matrix for Dirichlet problem is pcis->A_II */ 5500 n_D = pcis->n - pcis->n_B; 5501 opts = PETSC_FALSE; 5502 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5503 opts = PETSC_TRUE; 5504 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5505 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5506 /* default */ 5507 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5508 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5509 ierr = PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5510 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5511 if (issbaij) { 5512 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5513 } else { 5514 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5515 } 5516 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5517 } 5518 ierr = MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5519 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);CHKERRQ(ierr); 5520 /* Allow user's customization */ 5521 if (opts) { 5522 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5523 } 5524 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5525 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5526 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);CHKERRQ(ierr); 5527 } 5528 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5529 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5530 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5531 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5532 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5533 const PetscInt *idxs; 5534 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5535 5536 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5537 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5538 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5539 for (i=0;i<nl;i++) { 5540 for (d=0;d<cdim;d++) { 5541 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5542 } 5543 } 5544 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5545 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5546 ierr = PetscFree(scoords);CHKERRQ(ierr); 5547 } 5548 if (sub_schurs && sub_schurs->reuse_solver) { 5549 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5550 5551 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5552 } 5553 5554 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5555 if (!n_D) { 5556 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5557 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5558 } 5559 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5560 /* set ksp_D into pcis data */ 5561 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5562 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5563 pcis->ksp_D = pcbddc->ksp_D; 5564 } 5565 5566 /* NEUMANN PROBLEM */ 5567 A_RR = NULL; 5568 if (neumann) { 5569 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5570 PetscInt ibs,mbs; 5571 PetscBool issbaij, reuse_neumann_solver; 5572 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5573 5574 reuse_neumann_solver = PETSC_FALSE; 5575 if (sub_schurs && sub_schurs->reuse_solver) { 5576 IS iP; 5577 5578 reuse_neumann_solver = PETSC_TRUE; 5579 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5580 if (iP) reuse_neumann_solver = PETSC_FALSE; 5581 } 5582 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5583 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5584 if (pcbddc->ksp_R) { /* already created ksp */ 5585 PetscInt nn_R; 5586 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5587 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5588 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5589 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5590 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5591 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5592 reuse = MAT_INITIAL_MATRIX; 5593 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5594 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5595 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5596 reuse = MAT_INITIAL_MATRIX; 5597 } else { /* safe to reuse the matrix */ 5598 reuse = MAT_REUSE_MATRIX; 5599 } 5600 } 5601 /* last check */ 5602 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5603 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5604 reuse = MAT_INITIAL_MATRIX; 5605 } 5606 } else { /* first time, so we need to create the matrix */ 5607 reuse = MAT_INITIAL_MATRIX; 5608 } 5609 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5610 TODO: Get Rid of these conversions */ 5611 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5612 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5613 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5614 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5615 if (matis->A == pcbddc->local_mat) { 5616 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5617 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5618 } else { 5619 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5620 } 5621 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5622 if (matis->A == pcbddc->local_mat) { 5623 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5624 ierr = MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5625 } else { 5626 ierr = MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5627 } 5628 } 5629 /* extract A_RR */ 5630 if (reuse_neumann_solver) { 5631 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5632 5633 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5634 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5635 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5636 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5637 } else { 5638 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5639 } 5640 } else { 5641 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5642 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5643 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5644 } 5645 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5646 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5647 } 5648 if (pcbddc->local_mat->symmetric_set) { 5649 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5650 } 5651 opts = PETSC_FALSE; 5652 if (!pcbddc->ksp_R) { /* create object if not present */ 5653 opts = PETSC_TRUE; 5654 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5655 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5656 /* default */ 5657 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5658 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5659 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5660 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5661 if (issbaij) { 5662 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5663 } else { 5664 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5665 } 5666 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5667 } 5668 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5669 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5670 if (opts) { /* Allow user's customization once */ 5671 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5672 } 5673 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5674 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5675 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);CHKERRQ(ierr); 5676 } 5677 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5678 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5679 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5680 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5681 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5682 const PetscInt *idxs; 5683 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5684 5685 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5686 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5687 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5688 for (i=0;i<nl;i++) { 5689 for (d=0;d<cdim;d++) { 5690 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5691 } 5692 } 5693 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5694 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5695 ierr = PetscFree(scoords);CHKERRQ(ierr); 5696 } 5697 5698 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5699 if (!n_R) { 5700 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5701 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5702 } 5703 /* Reuse solver if it is present */ 5704 if (reuse_neumann_solver) { 5705 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5706 5707 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5708 } 5709 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5710 } 5711 5712 if (pcbddc->dbg_flag) { 5713 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5714 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5715 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5716 } 5717 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5718 5719 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5720 if (pcbddc->NullSpace_corr[0]) { 5721 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5722 } 5723 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5724 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5725 } 5726 if (neumann && pcbddc->NullSpace_corr[2]) { 5727 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5728 } 5729 /* check Dirichlet and Neumann solvers */ 5730 if (pcbddc->dbg_flag) { 5731 if (dirichlet) { /* Dirichlet */ 5732 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5733 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5734 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5735 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 5736 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5737 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5738 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5739 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5740 } 5741 if (neumann) { /* Neumann */ 5742 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5743 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5744 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5745 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 5746 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5747 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5748 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5749 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5750 } 5751 } 5752 /* free Neumann problem's matrix */ 5753 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5754 PetscFunctionReturn(0); 5755 } 5756 5757 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5758 { 5759 PetscErrorCode ierr; 5760 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5761 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5762 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5763 5764 PetscFunctionBegin; 5765 if (!reuse_solver) { 5766 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5767 } 5768 if (!pcbddc->switch_static) { 5769 if (applytranspose && pcbddc->local_auxmat1) { 5770 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5771 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5772 } 5773 if (!reuse_solver) { 5774 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5775 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5776 } else { 5777 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5778 5779 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5780 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5781 } 5782 } else { 5783 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5784 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5785 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5786 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5787 if (applytranspose && pcbddc->local_auxmat1) { 5788 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5789 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5790 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5791 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5792 } 5793 } 5794 if (!reuse_solver || pcbddc->switch_static) { 5795 if (applytranspose) { 5796 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5797 } else { 5798 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5799 } 5800 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);CHKERRQ(ierr); 5801 } else { 5802 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5803 5804 if (applytranspose) { 5805 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5806 } else { 5807 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5808 } 5809 } 5810 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5811 if (!pcbddc->switch_static) { 5812 if (!reuse_solver) { 5813 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5814 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5815 } else { 5816 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5817 5818 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5819 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5820 } 5821 if (!applytranspose && pcbddc->local_auxmat1) { 5822 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5823 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5824 } 5825 } else { 5826 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5827 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5828 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5829 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5830 if (!applytranspose && pcbddc->local_auxmat1) { 5831 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5832 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5833 } 5834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5836 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5837 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5838 } 5839 PetscFunctionReturn(0); 5840 } 5841 5842 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5843 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5844 { 5845 PetscErrorCode ierr; 5846 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5847 PC_IS* pcis = (PC_IS*) (pc->data); 5848 const PetscScalar zero = 0.0; 5849 5850 PetscFunctionBegin; 5851 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5852 if (!pcbddc->benign_apply_coarse_only) { 5853 if (applytranspose) { 5854 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5855 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5856 } else { 5857 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5858 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5859 } 5860 } else { 5861 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5862 } 5863 5864 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5865 if (pcbddc->benign_n) { 5866 PetscScalar *array; 5867 PetscInt j; 5868 5869 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5870 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5871 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5872 } 5873 5874 /* start communications from local primal nodes to rhs of coarse solver */ 5875 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5876 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5877 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5878 5879 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5880 if (pcbddc->coarse_ksp) { 5881 Mat coarse_mat; 5882 Vec rhs,sol; 5883 MatNullSpace nullsp; 5884 PetscBool isbddc = PETSC_FALSE; 5885 5886 if (pcbddc->benign_have_null) { 5887 PC coarse_pc; 5888 5889 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5890 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5891 /* we need to propagate to coarser levels the need for a possible benign correction */ 5892 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5893 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5894 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5895 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5896 } 5897 } 5898 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5899 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5900 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5901 if (applytranspose) { 5902 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5903 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5904 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5905 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5906 if (nullsp) { 5907 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5908 } 5909 } else { 5910 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5911 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5912 PC coarse_pc; 5913 5914 if (nullsp) { 5915 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5916 } 5917 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5918 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5919 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5920 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5921 } else { 5922 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5923 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5924 if (nullsp) { 5925 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5926 } 5927 } 5928 } 5929 /* we don't need the benign correction at coarser levels anymore */ 5930 if (pcbddc->benign_have_null && isbddc) { 5931 PC coarse_pc; 5932 PC_BDDC* coarsepcbddc; 5933 5934 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5935 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5936 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5937 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5938 } 5939 } 5940 5941 /* Local solution on R nodes */ 5942 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5943 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5944 } 5945 /* communications from coarse sol to local primal nodes */ 5946 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5947 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5948 5949 /* Sum contributions from the two levels */ 5950 if (!pcbddc->benign_apply_coarse_only) { 5951 if (applytranspose) { 5952 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5953 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5954 } else { 5955 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5956 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5957 } 5958 /* store p0 */ 5959 if (pcbddc->benign_n) { 5960 PetscScalar *array; 5961 PetscInt j; 5962 5963 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5964 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5965 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5966 } 5967 } else { /* expand the coarse solution */ 5968 if (applytranspose) { 5969 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5970 } else { 5971 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5972 } 5973 } 5974 PetscFunctionReturn(0); 5975 } 5976 5977 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5978 { 5979 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5980 Vec from,to; 5981 const PetscScalar *array; 5982 PetscErrorCode ierr; 5983 5984 PetscFunctionBegin; 5985 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5986 from = pcbddc->coarse_vec; 5987 to = pcbddc->vec1_P; 5988 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5989 Vec tvec; 5990 5991 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5992 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5993 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5994 ierr = VecGetArrayRead(tvec,&array);CHKERRQ(ierr); 5995 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5996 ierr = VecRestoreArrayRead(tvec,&array);CHKERRQ(ierr); 5997 } 5998 } else { /* from local to global -> put data in coarse right hand side */ 5999 from = pcbddc->vec1_P; 6000 to = pcbddc->coarse_vec; 6001 } 6002 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6003 PetscFunctionReturn(0); 6004 } 6005 6006 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6007 { 6008 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 6009 Vec from,to; 6010 const PetscScalar *array; 6011 PetscErrorCode ierr; 6012 6013 PetscFunctionBegin; 6014 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6015 from = pcbddc->coarse_vec; 6016 to = pcbddc->vec1_P; 6017 } else { /* from local to global -> put data in coarse right hand side */ 6018 from = pcbddc->vec1_P; 6019 to = pcbddc->coarse_vec; 6020 } 6021 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6022 if (smode == SCATTER_FORWARD) { 6023 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6024 Vec tvec; 6025 6026 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6027 ierr = VecGetArrayRead(to,&array);CHKERRQ(ierr); 6028 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 6029 ierr = VecRestoreArrayRead(to,&array);CHKERRQ(ierr); 6030 } 6031 } else { 6032 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6033 ierr = VecResetArray(from);CHKERRQ(ierr); 6034 } 6035 } 6036 PetscFunctionReturn(0); 6037 } 6038 6039 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6040 { 6041 PetscErrorCode ierr; 6042 PC_IS* pcis = (PC_IS*)(pc->data); 6043 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6044 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6045 /* one and zero */ 6046 PetscScalar one=1.0,zero=0.0; 6047 /* space to store constraints and their local indices */ 6048 PetscScalar *constraints_data; 6049 PetscInt *constraints_idxs,*constraints_idxs_B; 6050 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6051 PetscInt *constraints_n; 6052 /* iterators */ 6053 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6054 /* BLAS integers */ 6055 PetscBLASInt lwork,lierr; 6056 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6057 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6058 /* reuse */ 6059 PetscInt olocal_primal_size,olocal_primal_size_cc; 6060 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6061 /* change of basis */ 6062 PetscBool qr_needed; 6063 PetscBT change_basis,qr_needed_idx; 6064 /* auxiliary stuff */ 6065 PetscInt *nnz,*is_indices; 6066 PetscInt ncc; 6067 /* some quantities */ 6068 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6069 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6070 PetscReal tol; /* tolerance for retaining eigenmodes */ 6071 6072 PetscFunctionBegin; 6073 tol = PetscSqrtReal(PETSC_SMALL); 6074 /* Destroy Mat objects computed previously */ 6075 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6076 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6077 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 6078 /* save info on constraints from previous setup (if any) */ 6079 olocal_primal_size = pcbddc->local_primal_size; 6080 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6081 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 6082 ierr = PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);CHKERRQ(ierr); 6083 ierr = PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);CHKERRQ(ierr); 6084 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6085 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6086 6087 if (!pcbddc->adaptive_selection) { 6088 IS ISForVertices,*ISForFaces,*ISForEdges; 6089 MatNullSpace nearnullsp; 6090 const Vec *nearnullvecs; 6091 Vec *localnearnullsp; 6092 PetscScalar *array; 6093 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6094 PetscBool nnsp_has_cnst; 6095 /* LAPACK working arrays for SVD or POD */ 6096 PetscBool skip_lapack,boolforchange; 6097 PetscScalar *work; 6098 PetscReal *singular_vals; 6099 #if defined(PETSC_USE_COMPLEX) 6100 PetscReal *rwork; 6101 #endif 6102 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6103 PetscBLASInt dummy_int=1; 6104 PetscScalar dummy_scalar=1.; 6105 PetscBool use_pod = PETSC_FALSE; 6106 6107 /* MKL SVD with same input gives different results on different processes! */ 6108 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL) 6109 use_pod = PETSC_TRUE; 6110 #endif 6111 /* Get index sets for faces, edges and vertices from graph */ 6112 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 6113 /* print some info */ 6114 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6115 PetscInt nv; 6116 6117 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6118 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 6119 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6120 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6121 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6122 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 6123 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 6124 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6125 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6126 } 6127 6128 /* free unneeded index sets */ 6129 if (!pcbddc->use_vertices) { 6130 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6131 } 6132 if (!pcbddc->use_edges) { 6133 for (i=0;i<n_ISForEdges;i++) { 6134 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6135 } 6136 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6137 n_ISForEdges = 0; 6138 } 6139 if (!pcbddc->use_faces) { 6140 for (i=0;i<n_ISForFaces;i++) { 6141 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6142 } 6143 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6144 n_ISForFaces = 0; 6145 } 6146 6147 /* check if near null space is attached to global mat */ 6148 if (pcbddc->use_nnsp) { 6149 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 6150 } else nearnullsp = NULL; 6151 6152 if (nearnullsp) { 6153 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 6154 /* remove any stored info */ 6155 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 6156 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6157 /* store information for BDDC solver reuse */ 6158 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 6159 pcbddc->onearnullspace = nearnullsp; 6160 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6161 for (i=0;i<nnsp_size;i++) { 6162 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 6163 } 6164 } else { /* if near null space is not provided BDDC uses constants by default */ 6165 nnsp_size = 0; 6166 nnsp_has_cnst = PETSC_TRUE; 6167 } 6168 /* get max number of constraints on a single cc */ 6169 max_constraints = nnsp_size; 6170 if (nnsp_has_cnst) max_constraints++; 6171 6172 /* 6173 Evaluate maximum storage size needed by the procedure 6174 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6175 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6176 There can be multiple constraints per connected component 6177 */ 6178 n_vertices = 0; 6179 if (ISForVertices) { 6180 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 6181 } 6182 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6183 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 6184 6185 total_counts = n_ISForFaces+n_ISForEdges; 6186 total_counts *= max_constraints; 6187 total_counts += n_vertices; 6188 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 6189 6190 total_counts = 0; 6191 max_size_of_constraint = 0; 6192 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6193 IS used_is; 6194 if (i<n_ISForEdges) { 6195 used_is = ISForEdges[i]; 6196 } else { 6197 used_is = ISForFaces[i-n_ISForEdges]; 6198 } 6199 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 6200 total_counts += j; 6201 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6202 } 6203 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 6204 6205 /* get local part of global near null space vectors */ 6206 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 6207 for (k=0;k<nnsp_size;k++) { 6208 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 6209 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6210 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6211 } 6212 6213 /* whether or not to skip lapack calls */ 6214 skip_lapack = PETSC_TRUE; 6215 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6216 6217 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6218 if (!skip_lapack) { 6219 PetscScalar temp_work; 6220 6221 if (use_pod) { 6222 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6223 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 6224 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 6225 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 6226 #if defined(PETSC_USE_COMPLEX) 6227 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 6228 #endif 6229 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6230 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6231 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 6232 lwork = -1; 6233 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6234 #if !defined(PETSC_USE_COMPLEX) 6235 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6236 #else 6237 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6238 #endif 6239 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6240 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6241 } else { 6242 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6243 /* SVD */ 6244 PetscInt max_n,min_n; 6245 max_n = max_size_of_constraint; 6246 min_n = max_constraints; 6247 if (max_size_of_constraint < max_constraints) { 6248 min_n = max_size_of_constraint; 6249 max_n = max_constraints; 6250 } 6251 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 6252 #if defined(PETSC_USE_COMPLEX) 6253 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 6254 #endif 6255 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6256 lwork = -1; 6257 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 6258 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 6259 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 6260 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6261 #if !defined(PETSC_USE_COMPLEX) 6262 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6263 #else 6264 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6265 #endif 6266 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6267 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6268 #else 6269 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6270 #endif /* on missing GESVD */ 6271 } 6272 /* Allocate optimal workspace */ 6273 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 6274 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 6275 } 6276 /* Now we can loop on constraining sets */ 6277 total_counts = 0; 6278 constraints_idxs_ptr[0] = 0; 6279 constraints_data_ptr[0] = 0; 6280 /* vertices */ 6281 if (n_vertices) { 6282 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6283 ierr = PetscArraycpy(constraints_idxs,is_indices,n_vertices);CHKERRQ(ierr); 6284 for (i=0;i<n_vertices;i++) { 6285 constraints_n[total_counts] = 1; 6286 constraints_data[total_counts] = 1.0; 6287 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6288 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6289 total_counts++; 6290 } 6291 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6292 n_vertices = total_counts; 6293 } 6294 6295 /* edges and faces */ 6296 total_counts_cc = total_counts; 6297 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6298 IS used_is; 6299 PetscBool idxs_copied = PETSC_FALSE; 6300 6301 if (ncc<n_ISForEdges) { 6302 used_is = ISForEdges[ncc]; 6303 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6304 } else { 6305 used_is = ISForFaces[ncc-n_ISForEdges]; 6306 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6307 } 6308 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6309 6310 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6311 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6312 /* change of basis should not be performed on local periodic nodes */ 6313 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6314 if (nnsp_has_cnst) { 6315 PetscScalar quad_value; 6316 6317 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6318 idxs_copied = PETSC_TRUE; 6319 6320 if (!pcbddc->use_nnsp_true) { 6321 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6322 } else { 6323 quad_value = 1.0; 6324 } 6325 for (j=0;j<size_of_constraint;j++) { 6326 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6327 } 6328 temp_constraints++; 6329 total_counts++; 6330 } 6331 for (k=0;k<nnsp_size;k++) { 6332 PetscReal real_value; 6333 PetscScalar *ptr_to_data; 6334 6335 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6336 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6337 for (j=0;j<size_of_constraint;j++) { 6338 ptr_to_data[j] = array[is_indices[j]]; 6339 } 6340 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6341 /* check if array is null on the connected component */ 6342 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6343 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6344 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6345 temp_constraints++; 6346 total_counts++; 6347 if (!idxs_copied) { 6348 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6349 idxs_copied = PETSC_TRUE; 6350 } 6351 } 6352 } 6353 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6354 valid_constraints = temp_constraints; 6355 if (!pcbddc->use_nnsp_true && temp_constraints) { 6356 if (temp_constraints == 1) { /* just normalize the constraint */ 6357 PetscScalar norm,*ptr_to_data; 6358 6359 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6360 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6361 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6362 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6363 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6364 } else { /* perform SVD */ 6365 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6366 6367 if (use_pod) { 6368 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6369 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6370 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6371 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6372 from that computed using LAPACKgesvd 6373 -> This is due to a different computation of eigenvectors in LAPACKheev 6374 -> The quality of the POD-computed basis will be the same */ 6375 ierr = PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);CHKERRQ(ierr); 6376 /* Store upper triangular part of correlation matrix */ 6377 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6378 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6379 for (j=0;j<temp_constraints;j++) { 6380 for (k=0;k<j+1;k++) { 6381 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6382 } 6383 } 6384 /* compute eigenvalues and eigenvectors of correlation matrix */ 6385 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6386 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6387 #if !defined(PETSC_USE_COMPLEX) 6388 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6389 #else 6390 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6391 #endif 6392 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6393 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6394 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6395 j = 0; 6396 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6397 total_counts = total_counts-j; 6398 valid_constraints = temp_constraints-j; 6399 /* scale and copy POD basis into used quadrature memory */ 6400 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6401 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6402 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6403 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6404 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6405 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6406 if (j<temp_constraints) { 6407 PetscInt ii; 6408 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6409 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6410 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6411 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6412 for (k=0;k<temp_constraints-j;k++) { 6413 for (ii=0;ii<size_of_constraint;ii++) { 6414 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6415 } 6416 } 6417 } 6418 } else { 6419 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6420 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6421 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6422 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6423 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6424 #if !defined(PETSC_USE_COMPLEX) 6425 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6426 #else 6427 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6428 #endif 6429 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6430 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6431 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6432 k = temp_constraints; 6433 if (k > size_of_constraint) k = size_of_constraint; 6434 j = 0; 6435 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6436 valid_constraints = k-j; 6437 total_counts = total_counts-temp_constraints+valid_constraints; 6438 #else 6439 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6440 #endif /* on missing GESVD */ 6441 } 6442 } 6443 } 6444 /* update pointers information */ 6445 if (valid_constraints) { 6446 constraints_n[total_counts_cc] = valid_constraints; 6447 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6448 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6449 /* set change_of_basis flag */ 6450 if (boolforchange) { 6451 PetscBTSet(change_basis,total_counts_cc); 6452 } 6453 total_counts_cc++; 6454 } 6455 } 6456 /* free workspace */ 6457 if (!skip_lapack) { 6458 ierr = PetscFree(work);CHKERRQ(ierr); 6459 #if defined(PETSC_USE_COMPLEX) 6460 ierr = PetscFree(rwork);CHKERRQ(ierr); 6461 #endif 6462 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6463 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6464 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6465 } 6466 for (k=0;k<nnsp_size;k++) { 6467 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6468 } 6469 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6470 /* free index sets of faces, edges and vertices */ 6471 for (i=0;i<n_ISForFaces;i++) { 6472 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6473 } 6474 if (n_ISForFaces) { 6475 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6476 } 6477 for (i=0;i<n_ISForEdges;i++) { 6478 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6479 } 6480 if (n_ISForEdges) { 6481 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6482 } 6483 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6484 } else { 6485 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6486 6487 total_counts = 0; 6488 n_vertices = 0; 6489 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6490 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6491 } 6492 max_constraints = 0; 6493 total_counts_cc = 0; 6494 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6495 total_counts += pcbddc->adaptive_constraints_n[i]; 6496 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6497 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6498 } 6499 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6500 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6501 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6502 constraints_data = pcbddc->adaptive_constraints_data; 6503 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6504 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6505 total_counts_cc = 0; 6506 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6507 if (pcbddc->adaptive_constraints_n[i]) { 6508 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6509 } 6510 } 6511 6512 max_size_of_constraint = 0; 6513 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6514 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6515 /* Change of basis */ 6516 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6517 if (pcbddc->use_change_of_basis) { 6518 for (i=0;i<sub_schurs->n_subs;i++) { 6519 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6520 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6521 } 6522 } 6523 } 6524 } 6525 pcbddc->local_primal_size = total_counts; 6526 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6527 6528 /* map constraints_idxs in boundary numbering */ 6529 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6530 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6531 6532 /* Create constraint matrix */ 6533 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6534 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6535 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6536 6537 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6538 /* determine if a QR strategy is needed for change of basis */ 6539 qr_needed = pcbddc->use_qr_single; 6540 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6541 total_primal_vertices=0; 6542 pcbddc->local_primal_size_cc = 0; 6543 for (i=0;i<total_counts_cc;i++) { 6544 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6545 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6546 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6547 pcbddc->local_primal_size_cc += 1; 6548 } else if (PetscBTLookup(change_basis,i)) { 6549 for (k=0;k<constraints_n[i];k++) { 6550 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6551 } 6552 pcbddc->local_primal_size_cc += constraints_n[i]; 6553 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6554 PetscBTSet(qr_needed_idx,i); 6555 qr_needed = PETSC_TRUE; 6556 } 6557 } else { 6558 pcbddc->local_primal_size_cc += 1; 6559 } 6560 } 6561 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6562 pcbddc->n_vertices = total_primal_vertices; 6563 /* permute indices in order to have a sorted set of vertices */ 6564 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6565 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6566 ierr = PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);CHKERRQ(ierr); 6567 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6568 6569 /* nonzero structure of constraint matrix */ 6570 /* and get reference dof for local constraints */ 6571 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6572 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6573 6574 j = total_primal_vertices; 6575 total_counts = total_primal_vertices; 6576 cum = total_primal_vertices; 6577 for (i=n_vertices;i<total_counts_cc;i++) { 6578 if (!PetscBTLookup(change_basis,i)) { 6579 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6580 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6581 cum++; 6582 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6583 for (k=0;k<constraints_n[i];k++) { 6584 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6585 nnz[j+k] = size_of_constraint; 6586 } 6587 j += constraints_n[i]; 6588 } 6589 } 6590 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6591 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6592 ierr = PetscFree(nnz);CHKERRQ(ierr); 6593 6594 /* set values in constraint matrix */ 6595 for (i=0;i<total_primal_vertices;i++) { 6596 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6597 } 6598 total_counts = total_primal_vertices; 6599 for (i=n_vertices;i<total_counts_cc;i++) { 6600 if (!PetscBTLookup(change_basis,i)) { 6601 PetscInt *cols; 6602 6603 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6604 cols = constraints_idxs+constraints_idxs_ptr[i]; 6605 for (k=0;k<constraints_n[i];k++) { 6606 PetscInt row = total_counts+k; 6607 PetscScalar *vals; 6608 6609 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6610 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6611 } 6612 total_counts += constraints_n[i]; 6613 } 6614 } 6615 /* assembling */ 6616 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6617 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6618 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6619 6620 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6621 if (pcbddc->use_change_of_basis) { 6622 /* dual and primal dofs on a single cc */ 6623 PetscInt dual_dofs,primal_dofs; 6624 /* working stuff for GEQRF */ 6625 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6626 PetscBLASInt lqr_work; 6627 /* working stuff for UNGQR */ 6628 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6629 PetscBLASInt lgqr_work; 6630 /* working stuff for TRTRS */ 6631 PetscScalar *trs_rhs = NULL; 6632 PetscBLASInt Blas_NRHS; 6633 /* pointers for values insertion into change of basis matrix */ 6634 PetscInt *start_rows,*start_cols; 6635 PetscScalar *start_vals; 6636 /* working stuff for values insertion */ 6637 PetscBT is_primal; 6638 PetscInt *aux_primal_numbering_B; 6639 /* matrix sizes */ 6640 PetscInt global_size,local_size; 6641 /* temporary change of basis */ 6642 Mat localChangeOfBasisMatrix; 6643 /* extra space for debugging */ 6644 PetscScalar *dbg_work = NULL; 6645 6646 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6647 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6648 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6649 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6650 /* nonzeros for local mat */ 6651 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6652 if (!pcbddc->benign_change || pcbddc->fake_change) { 6653 for (i=0;i<pcis->n;i++) nnz[i]=1; 6654 } else { 6655 const PetscInt *ii; 6656 PetscInt n; 6657 PetscBool flg_row; 6658 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6659 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6660 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6661 } 6662 for (i=n_vertices;i<total_counts_cc;i++) { 6663 if (PetscBTLookup(change_basis,i)) { 6664 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6665 if (PetscBTLookup(qr_needed_idx,i)) { 6666 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6667 } else { 6668 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6669 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6670 } 6671 } 6672 } 6673 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6674 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6675 ierr = PetscFree(nnz);CHKERRQ(ierr); 6676 /* Set interior change in the matrix */ 6677 if (!pcbddc->benign_change || pcbddc->fake_change) { 6678 for (i=0;i<pcis->n;i++) { 6679 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6680 } 6681 } else { 6682 const PetscInt *ii,*jj; 6683 PetscScalar *aa; 6684 PetscInt n; 6685 PetscBool flg_row; 6686 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6687 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6688 for (i=0;i<n;i++) { 6689 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6690 } 6691 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6692 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6693 } 6694 6695 if (pcbddc->dbg_flag) { 6696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6698 } 6699 6700 6701 /* Now we loop on the constraints which need a change of basis */ 6702 /* 6703 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6704 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6705 6706 Basic blocks of change of basis matrix T computed by 6707 6708 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6709 6710 | 1 0 ... 0 s_1/S | 6711 | 0 1 ... 0 s_2/S | 6712 | ... | 6713 | 0 ... 1 s_{n-1}/S | 6714 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6715 6716 with S = \sum_{i=1}^n s_i^2 6717 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6718 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6719 6720 - QR decomposition of constraints otherwise 6721 */ 6722 if (qr_needed && max_size_of_constraint) { 6723 /* space to store Q */ 6724 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6725 /* array to store scaling factors for reflectors */ 6726 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6727 /* first we issue queries for optimal work */ 6728 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6729 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6730 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6731 lqr_work = -1; 6732 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6733 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6734 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6735 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6736 lgqr_work = -1; 6737 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6738 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6739 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6740 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6741 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6742 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6743 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6744 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6745 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6746 /* array to store rhs and solution of triangular solver */ 6747 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6748 /* allocating workspace for check */ 6749 if (pcbddc->dbg_flag) { 6750 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6751 } 6752 } 6753 /* array to store whether a node is primal or not */ 6754 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6755 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6756 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6757 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6758 for (i=0;i<total_primal_vertices;i++) { 6759 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6760 } 6761 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6762 6763 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6764 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6765 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6766 if (PetscBTLookup(change_basis,total_counts)) { 6767 /* get constraint info */ 6768 primal_dofs = constraints_n[total_counts]; 6769 dual_dofs = size_of_constraint-primal_dofs; 6770 6771 if (pcbddc->dbg_flag) { 6772 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6773 } 6774 6775 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6776 6777 /* copy quadrature constraints for change of basis check */ 6778 if (pcbddc->dbg_flag) { 6779 ierr = PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6780 } 6781 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6782 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6783 6784 /* compute QR decomposition of constraints */ 6785 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6786 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6787 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6788 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6789 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6790 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6791 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6792 6793 /* explictly compute R^-T */ 6794 ierr = PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);CHKERRQ(ierr); 6795 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6796 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6797 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6798 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6799 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6800 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6801 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6802 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6803 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6804 6805 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6806 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6807 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6808 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6809 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6810 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6811 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6812 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6813 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6814 6815 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6816 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6817 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6818 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6819 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6820 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6821 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6822 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6823 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6824 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6825 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6826 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6827 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6828 6829 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6830 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6831 /* insert cols for primal dofs */ 6832 for (j=0;j<primal_dofs;j++) { 6833 start_vals = &qr_basis[j*size_of_constraint]; 6834 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6835 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6836 } 6837 /* insert cols for dual dofs */ 6838 for (j=0,k=0;j<dual_dofs;k++) { 6839 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6840 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6841 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6842 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6843 j++; 6844 } 6845 } 6846 6847 /* check change of basis */ 6848 if (pcbddc->dbg_flag) { 6849 PetscInt ii,jj; 6850 PetscBool valid_qr=PETSC_TRUE; 6851 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6853 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6854 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6855 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6856 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6857 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6858 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6859 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6860 for (jj=0;jj<size_of_constraint;jj++) { 6861 for (ii=0;ii<primal_dofs;ii++) { 6862 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6863 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6864 } 6865 } 6866 if (!valid_qr) { 6867 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6868 for (jj=0;jj<size_of_constraint;jj++) { 6869 for (ii=0;ii<primal_dofs;ii++) { 6870 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6872 } 6873 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6874 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6875 } 6876 } 6877 } 6878 } else { 6879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6880 } 6881 } 6882 } else { /* simple transformation block */ 6883 PetscInt row,col; 6884 PetscScalar val,norm; 6885 6886 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6887 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6888 for (j=0;j<size_of_constraint;j++) { 6889 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6890 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6891 if (!PetscBTLookup(is_primal,row_B)) { 6892 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6893 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6894 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6895 } else { 6896 for (k=0;k<size_of_constraint;k++) { 6897 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6898 if (row != col) { 6899 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6900 } else { 6901 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6902 } 6903 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6904 } 6905 } 6906 } 6907 if (pcbddc->dbg_flag) { 6908 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6909 } 6910 } 6911 } else { 6912 if (pcbddc->dbg_flag) { 6913 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6914 } 6915 } 6916 } 6917 6918 /* free workspace */ 6919 if (qr_needed) { 6920 if (pcbddc->dbg_flag) { 6921 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6922 } 6923 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6924 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6925 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6926 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6927 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6928 } 6929 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6930 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6931 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6932 6933 /* assembling of global change of variable */ 6934 if (!pcbddc->fake_change) { 6935 Mat tmat; 6936 PetscInt bs; 6937 6938 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6939 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6940 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6941 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6942 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6943 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6944 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6945 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6946 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6947 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6948 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6949 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6950 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6951 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6952 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6953 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6954 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6955 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6956 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6957 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6958 6959 /* check */ 6960 if (pcbddc->dbg_flag) { 6961 PetscReal error; 6962 Vec x,x_change; 6963 6964 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6965 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6966 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6967 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6968 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6969 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6970 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6971 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6972 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6973 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6974 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6975 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6976 if (error > PETSC_SMALL) { 6977 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6978 } 6979 ierr = VecDestroy(&x);CHKERRQ(ierr); 6980 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6981 } 6982 /* adapt sub_schurs computed (if any) */ 6983 if (pcbddc->use_deluxe_scaling) { 6984 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6985 6986 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6987 if (sub_schurs && sub_schurs->S_Ej_all) { 6988 Mat S_new,tmat; 6989 IS is_all_N,is_V_Sall = NULL; 6990 6991 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6992 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6993 if (pcbddc->deluxe_zerorows) { 6994 ISLocalToGlobalMapping NtoSall; 6995 IS is_V; 6996 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6997 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6998 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6999 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 7000 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 7001 } 7002 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 7003 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7004 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 7005 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7006 if (pcbddc->deluxe_zerorows) { 7007 const PetscScalar *array; 7008 const PetscInt *idxs_V,*idxs_all; 7009 PetscInt i,n_V; 7010 7011 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7012 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 7013 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7014 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7015 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 7016 for (i=0;i<n_V;i++) { 7017 PetscScalar val; 7018 PetscInt idx; 7019 7020 idx = idxs_V[i]; 7021 val = array[idxs_all[idxs_V[i]]]; 7022 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 7023 } 7024 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7025 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7026 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 7027 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7028 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7029 } 7030 sub_schurs->S_Ej_all = S_new; 7031 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7032 if (sub_schurs->sum_S_Ej_all) { 7033 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7034 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 7035 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7036 if (pcbddc->deluxe_zerorows) { 7037 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7038 } 7039 sub_schurs->sum_S_Ej_all = S_new; 7040 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7041 } 7042 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 7043 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 7044 } 7045 /* destroy any change of basis context in sub_schurs */ 7046 if (sub_schurs && sub_schurs->change) { 7047 PetscInt i; 7048 7049 for (i=0;i<sub_schurs->n_subs;i++) { 7050 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 7051 } 7052 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 7053 } 7054 } 7055 if (pcbddc->switch_static) { /* need to save the local change */ 7056 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7057 } else { 7058 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 7059 } 7060 /* determine if any process has changed the pressures locally */ 7061 pcbddc->change_interior = pcbddc->benign_have_null; 7062 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7063 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 7064 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7065 pcbddc->use_qr_single = qr_needed; 7066 } 7067 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7068 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7069 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 7070 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7071 } else { 7072 Mat benign_global = NULL; 7073 if (pcbddc->benign_have_null) { 7074 Mat M; 7075 7076 pcbddc->change_interior = PETSC_TRUE; 7077 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 7078 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 7079 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 7080 if (pcbddc->benign_change) { 7081 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 7082 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 7083 } else { 7084 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 7085 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 7086 } 7087 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 7088 ierr = MatDestroy(&M);CHKERRQ(ierr); 7089 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7090 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7091 } 7092 if (pcbddc->user_ChangeOfBasisMatrix) { 7093 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 7094 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 7095 } else if (pcbddc->benign_have_null) { 7096 pcbddc->ChangeOfBasisMatrix = benign_global; 7097 } 7098 } 7099 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7100 IS is_global; 7101 const PetscInt *gidxs; 7102 7103 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 7105 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7106 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 7107 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 7108 } 7109 } 7110 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7111 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 7112 } 7113 7114 if (!pcbddc->fake_change) { 7115 /* add pressure dofs to set of primal nodes for numbering purposes */ 7116 for (i=0;i<pcbddc->benign_n;i++) { 7117 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7118 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7119 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7120 pcbddc->local_primal_size_cc++; 7121 pcbddc->local_primal_size++; 7122 } 7123 7124 /* check if a new primal space has been introduced (also take into account benign trick) */ 7125 pcbddc->new_primal_space_local = PETSC_TRUE; 7126 if (olocal_primal_size == pcbddc->local_primal_size) { 7127 ierr = PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7128 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7129 if (!pcbddc->new_primal_space_local) { 7130 ierr = PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7131 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7132 } 7133 } 7134 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7135 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7136 } 7137 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 7138 7139 /* flush dbg viewer */ 7140 if (pcbddc->dbg_flag) { 7141 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7142 } 7143 7144 /* free workspace */ 7145 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 7146 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 7147 if (!pcbddc->adaptive_selection) { 7148 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 7149 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 7150 } else { 7151 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7152 pcbddc->adaptive_constraints_idxs_ptr, 7153 pcbddc->adaptive_constraints_data_ptr, 7154 pcbddc->adaptive_constraints_idxs, 7155 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 7156 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 7157 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 7158 } 7159 PetscFunctionReturn(0); 7160 } 7161 7162 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7163 { 7164 ISLocalToGlobalMapping map; 7165 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7166 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7167 PetscInt i,N; 7168 PetscBool rcsr = PETSC_FALSE; 7169 PetscErrorCode ierr; 7170 7171 PetscFunctionBegin; 7172 if (pcbddc->recompute_topography) { 7173 pcbddc->graphanalyzed = PETSC_FALSE; 7174 /* Reset previously computed graph */ 7175 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 7176 /* Init local Graph struct */ 7177 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 7178 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 7179 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 7180 7181 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7182 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7183 } 7184 /* Check validity of the csr graph passed in by the user */ 7185 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7186 7187 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7188 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7189 PetscInt *xadj,*adjncy; 7190 PetscInt nvtxs; 7191 PetscBool flg_row=PETSC_FALSE; 7192 7193 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7194 if (flg_row) { 7195 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 7196 pcbddc->computed_rowadj = PETSC_TRUE; 7197 } 7198 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7199 rcsr = PETSC_TRUE; 7200 } 7201 if (pcbddc->dbg_flag) { 7202 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7203 } 7204 7205 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7206 PetscReal *lcoords; 7207 PetscInt n; 7208 MPI_Datatype dimrealtype; 7209 7210 /* TODO: support for blocked */ 7211 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7212 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7213 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 7214 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 7215 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 7216 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 7217 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 7218 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 7219 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 7220 7221 pcbddc->mat_graph->coords = lcoords; 7222 pcbddc->mat_graph->cloc = PETSC_TRUE; 7223 pcbddc->mat_graph->cnloc = n; 7224 } 7225 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7226 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 7227 7228 /* Setup of Graph */ 7229 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7230 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7231 7232 /* attach info on disconnected subdomains if present */ 7233 if (pcbddc->n_local_subs) { 7234 PetscInt *local_subs,n,totn; 7235 7236 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7237 ierr = PetscMalloc1(n,&local_subs);CHKERRQ(ierr); 7238 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7239 for (i=0;i<pcbddc->n_local_subs;i++) { 7240 const PetscInt *idxs; 7241 PetscInt nl,j; 7242 7243 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 7244 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7245 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7246 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7247 } 7248 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7249 pcbddc->mat_graph->n_local_subs = totn + 1; 7250 pcbddc->mat_graph->local_subs = local_subs; 7251 } 7252 } 7253 7254 if (!pcbddc->graphanalyzed) { 7255 /* Graph's connected components analysis */ 7256 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 7257 pcbddc->graphanalyzed = PETSC_TRUE; 7258 pcbddc->corner_selected = pcbddc->corner_selection; 7259 } 7260 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7261 PetscFunctionReturn(0); 7262 } 7263 7264 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7265 { 7266 PetscInt i,j,n; 7267 PetscScalar *alphas; 7268 PetscReal norm,*onorms; 7269 PetscErrorCode ierr; 7270 7271 PetscFunctionBegin; 7272 n = *nio; 7273 if (!n) PetscFunctionReturn(0); 7274 ierr = PetscMalloc2(n,&alphas,n,&onorms);CHKERRQ(ierr); 7275 ierr = VecNormalize(vecs[0],&norm);CHKERRQ(ierr); 7276 if (norm < PETSC_SMALL) { 7277 onorms[0] = 0.0; 7278 ierr = VecSet(vecs[0],0.0);CHKERRQ(ierr); 7279 } else { 7280 onorms[0] = norm; 7281 } 7282 7283 for (i=1;i<n;i++) { 7284 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7285 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7286 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7287 ierr = VecNormalize(vecs[i],&norm);CHKERRQ(ierr); 7288 if (norm < PETSC_SMALL) { 7289 onorms[i] = 0.0; 7290 ierr = VecSet(vecs[i],0.0);CHKERRQ(ierr); 7291 } else { 7292 onorms[i] = norm; 7293 } 7294 } 7295 /* push nonzero vectors at the beginning */ 7296 for (i=0;i<n;i++) { 7297 if (onorms[i] == 0.0) { 7298 for (j=i+1;j<n;j++) { 7299 if (onorms[j] != 0.0) { 7300 ierr = VecCopy(vecs[j],vecs[i]);CHKERRQ(ierr); 7301 onorms[j] = 0.0; 7302 } 7303 } 7304 } 7305 } 7306 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7307 ierr = PetscFree2(alphas,onorms);CHKERRQ(ierr); 7308 PetscFunctionReturn(0); 7309 } 7310 7311 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7312 { 7313 Mat A; 7314 PetscInt n_neighs,*neighs,*n_shared,**shared; 7315 PetscMPIInt size,rank,color; 7316 PetscInt *xadj,*adjncy; 7317 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7318 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7319 PetscInt void_procs,*procs_candidates = NULL; 7320 PetscInt xadj_count,*count; 7321 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7322 PetscSubcomm psubcomm; 7323 MPI_Comm subcomm; 7324 PetscErrorCode ierr; 7325 7326 PetscFunctionBegin; 7327 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7328 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7329 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7330 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7331 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7332 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7333 7334 if (have_void) *have_void = PETSC_FALSE; 7335 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7336 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7337 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7338 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7339 im_active = !!n; 7340 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7341 void_procs = size - active_procs; 7342 /* get ranks of of non-active processes in mat communicator */ 7343 if (void_procs) { 7344 PetscInt ncand; 7345 7346 if (have_void) *have_void = PETSC_TRUE; 7347 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7348 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7349 for (i=0,ncand=0;i<size;i++) { 7350 if (!procs_candidates[i]) { 7351 procs_candidates[ncand++] = i; 7352 } 7353 } 7354 /* force n_subdomains to be not greater that the number of non-active processes */ 7355 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7356 } 7357 7358 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7359 number of subdomains requested 1 -> send to master or first candidate in voids */ 7360 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7361 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7362 PetscInt issize,isidx,dest; 7363 if (*n_subdomains == 1) dest = 0; 7364 else dest = rank; 7365 if (im_active) { 7366 issize = 1; 7367 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7368 isidx = procs_candidates[dest]; 7369 } else { 7370 isidx = dest; 7371 } 7372 } else { 7373 issize = 0; 7374 isidx = -1; 7375 } 7376 if (*n_subdomains != 1) *n_subdomains = active_procs; 7377 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7378 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7379 PetscFunctionReturn(0); 7380 } 7381 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7382 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7383 threshold = PetscMax(threshold,2); 7384 7385 /* Get info on mapping */ 7386 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7387 7388 /* build local CSR graph of subdomains' connectivity */ 7389 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7390 xadj[0] = 0; 7391 xadj[1] = PetscMax(n_neighs-1,0); 7392 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7393 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7394 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7395 for (i=1;i<n_neighs;i++) 7396 for (j=0;j<n_shared[i];j++) 7397 count[shared[i][j]] += 1; 7398 7399 xadj_count = 0; 7400 for (i=1;i<n_neighs;i++) { 7401 for (j=0;j<n_shared[i];j++) { 7402 if (count[shared[i][j]] < threshold) { 7403 adjncy[xadj_count] = neighs[i]; 7404 adjncy_wgt[xadj_count] = n_shared[i]; 7405 xadj_count++; 7406 break; 7407 } 7408 } 7409 } 7410 xadj[1] = xadj_count; 7411 ierr = PetscFree(count);CHKERRQ(ierr); 7412 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7413 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7414 7415 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7416 7417 /* Restrict work on active processes only */ 7418 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7419 if (void_procs) { 7420 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7421 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7422 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7423 subcomm = PetscSubcommChild(psubcomm); 7424 } else { 7425 psubcomm = NULL; 7426 subcomm = PetscObjectComm((PetscObject)mat); 7427 } 7428 7429 v_wgt = NULL; 7430 if (!color) { 7431 ierr = PetscFree(xadj);CHKERRQ(ierr); 7432 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7433 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7434 } else { 7435 Mat subdomain_adj; 7436 IS new_ranks,new_ranks_contig; 7437 MatPartitioning partitioner; 7438 PetscInt rstart=0,rend=0; 7439 PetscInt *is_indices,*oldranks; 7440 PetscMPIInt size; 7441 PetscBool aggregate; 7442 7443 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7444 if (void_procs) { 7445 PetscInt prank = rank; 7446 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7447 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7448 for (i=0;i<xadj[1];i++) { 7449 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7450 } 7451 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7452 } else { 7453 oldranks = NULL; 7454 } 7455 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7456 if (aggregate) { /* TODO: all this part could be made more efficient */ 7457 PetscInt lrows,row,ncols,*cols; 7458 PetscMPIInt nrank; 7459 PetscScalar *vals; 7460 7461 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7462 lrows = 0; 7463 if (nrank<redprocs) { 7464 lrows = size/redprocs; 7465 if (nrank<size%redprocs) lrows++; 7466 } 7467 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7468 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7469 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7470 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7471 row = nrank; 7472 ncols = xadj[1]-xadj[0]; 7473 cols = adjncy; 7474 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7475 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7476 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7477 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7478 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7479 ierr = PetscFree(xadj);CHKERRQ(ierr); 7480 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7481 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7482 ierr = PetscFree(vals);CHKERRQ(ierr); 7483 if (use_vwgt) { 7484 Vec v; 7485 const PetscScalar *array; 7486 PetscInt nl; 7487 7488 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7489 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7490 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7491 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7492 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7493 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7494 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7495 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7496 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7497 ierr = VecDestroy(&v);CHKERRQ(ierr); 7498 } 7499 } else { 7500 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7501 if (use_vwgt) { 7502 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7503 v_wgt[0] = n; 7504 } 7505 } 7506 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7507 7508 /* Partition */ 7509 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7510 #if defined(PETSC_HAVE_PTSCOTCH) 7511 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);CHKERRQ(ierr); 7512 #elif defined(PETSC_HAVE_PARMETIS) 7513 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); 7514 #else 7515 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);CHKERRQ(ierr); 7516 #endif 7517 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7518 if (v_wgt) { 7519 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7520 } 7521 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7522 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7523 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7524 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7525 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7526 7527 /* renumber new_ranks to avoid "holes" in new set of processors */ 7528 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7529 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7530 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7531 if (!aggregate) { 7532 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7533 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7534 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7535 } else if (oldranks) { 7536 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7537 } else { 7538 ranks_send_to_idx[0] = is_indices[0]; 7539 } 7540 } else { 7541 PetscInt idx = 0; 7542 PetscMPIInt tag; 7543 MPI_Request *reqs; 7544 7545 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7546 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7547 for (i=rstart;i<rend;i++) { 7548 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7549 } 7550 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7551 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7552 ierr = PetscFree(reqs);CHKERRQ(ierr); 7553 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7554 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7555 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7556 } else if (oldranks) { 7557 ranks_send_to_idx[0] = oldranks[idx]; 7558 } else { 7559 ranks_send_to_idx[0] = idx; 7560 } 7561 } 7562 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7563 /* clean up */ 7564 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7565 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7566 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7567 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7568 } 7569 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7570 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7571 7572 /* assemble parallel IS for sends */ 7573 i = 1; 7574 if (!color) i=0; 7575 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7576 PetscFunctionReturn(0); 7577 } 7578 7579 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7580 7581 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7582 { 7583 Mat local_mat; 7584 IS is_sends_internal; 7585 PetscInt rows,cols,new_local_rows; 7586 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7587 PetscBool ismatis,isdense,newisdense,destroy_mat; 7588 ISLocalToGlobalMapping l2gmap; 7589 PetscInt* l2gmap_indices; 7590 const PetscInt* is_indices; 7591 MatType new_local_type; 7592 /* buffers */ 7593 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7594 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7595 PetscInt *recv_buffer_idxs_local; 7596 PetscScalar *ptr_vals,*recv_buffer_vals; 7597 const PetscScalar *send_buffer_vals; 7598 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7599 /* MPI */ 7600 MPI_Comm comm,comm_n; 7601 PetscSubcomm subcomm; 7602 PetscMPIInt n_sends,n_recvs,size; 7603 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7604 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7605 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7606 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7607 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7608 PetscErrorCode ierr; 7609 7610 PetscFunctionBegin; 7611 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7612 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7613 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7614 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7615 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7616 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7617 PetscValidLogicalCollectiveBool(mat,reuse,6); 7618 PetscValidLogicalCollectiveInt(mat,nis,8); 7619 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7620 if (nvecs) { 7621 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7622 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7623 } 7624 /* further checks */ 7625 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7626 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7627 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7628 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7629 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7630 if (reuse && *mat_n) { 7631 PetscInt mrows,mcols,mnrows,mncols; 7632 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7633 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7634 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7635 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7636 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7637 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7638 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7639 } 7640 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7641 PetscValidLogicalCollectiveInt(mat,bs,0); 7642 7643 /* prepare IS for sending if not provided */ 7644 if (!is_sends) { 7645 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7646 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7647 } else { 7648 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7649 is_sends_internal = is_sends; 7650 } 7651 7652 /* get comm */ 7653 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7654 7655 /* compute number of sends */ 7656 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7657 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7658 7659 /* compute number of receives */ 7660 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 7661 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7662 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 7663 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7664 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7665 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7666 ierr = PetscFree(iflags);CHKERRQ(ierr); 7667 7668 /* restrict comm if requested */ 7669 subcomm = NULL; 7670 destroy_mat = PETSC_FALSE; 7671 if (restrict_comm) { 7672 PetscMPIInt color,subcommsize; 7673 7674 color = 0; 7675 if (restrict_full) { 7676 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7677 } else { 7678 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7679 } 7680 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7681 subcommsize = size - subcommsize; 7682 /* check if reuse has been requested */ 7683 if (reuse) { 7684 if (*mat_n) { 7685 PetscMPIInt subcommsize2; 7686 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7687 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7688 comm_n = PetscObjectComm((PetscObject)*mat_n); 7689 } else { 7690 comm_n = PETSC_COMM_SELF; 7691 } 7692 } else { /* MAT_INITIAL_MATRIX */ 7693 PetscMPIInt rank; 7694 7695 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7696 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7697 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7698 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7699 comm_n = PetscSubcommChild(subcomm); 7700 } 7701 /* flag to destroy *mat_n if not significative */ 7702 if (color) destroy_mat = PETSC_TRUE; 7703 } else { 7704 comm_n = comm; 7705 } 7706 7707 /* prepare send/receive buffers */ 7708 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7709 ierr = PetscArrayzero(ilengths_idxs,size);CHKERRQ(ierr); 7710 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7711 ierr = PetscArrayzero(ilengths_vals,size);CHKERRQ(ierr); 7712 if (nis) { 7713 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7714 } 7715 7716 /* Get data from local matrices */ 7717 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7718 /* TODO: See below some guidelines on how to prepare the local buffers */ 7719 /* 7720 send_buffer_vals should contain the raw values of the local matrix 7721 send_buffer_idxs should contain: 7722 - MatType_PRIVATE type 7723 - PetscInt size_of_l2gmap 7724 - PetscInt global_row_indices[size_of_l2gmap] 7725 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7726 */ 7727 else { 7728 ierr = MatDenseGetArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7729 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7730 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7731 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7732 send_buffer_idxs[1] = i; 7733 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7734 ierr = PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);CHKERRQ(ierr); 7735 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7736 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7737 for (i=0;i<n_sends;i++) { 7738 ilengths_vals[is_indices[i]] = len*len; 7739 ilengths_idxs[is_indices[i]] = len+2; 7740 } 7741 } 7742 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7743 /* additional is (if any) */ 7744 if (nis) { 7745 PetscMPIInt psum; 7746 PetscInt j; 7747 for (j=0,psum=0;j<nis;j++) { 7748 PetscInt plen; 7749 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7750 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7751 psum += len+1; /* indices + lenght */ 7752 } 7753 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7754 for (j=0,psum=0;j<nis;j++) { 7755 PetscInt plen; 7756 const PetscInt *is_array_idxs; 7757 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7758 send_buffer_idxs_is[psum] = plen; 7759 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7760 ierr = PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);CHKERRQ(ierr); 7761 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7762 psum += plen+1; /* indices + lenght */ 7763 } 7764 for (i=0;i<n_sends;i++) { 7765 ilengths_idxs_is[is_indices[i]] = psum; 7766 } 7767 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7768 } 7769 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7770 7771 buf_size_idxs = 0; 7772 buf_size_vals = 0; 7773 buf_size_idxs_is = 0; 7774 buf_size_vecs = 0; 7775 for (i=0;i<n_recvs;i++) { 7776 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7777 buf_size_vals += (PetscInt)olengths_vals[i]; 7778 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7779 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7780 } 7781 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7782 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7783 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7784 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7785 7786 /* get new tags for clean communications */ 7787 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7788 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7789 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7790 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7791 7792 /* allocate for requests */ 7793 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7794 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7795 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7796 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7797 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7798 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7799 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7800 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7801 7802 /* communications */ 7803 ptr_idxs = recv_buffer_idxs; 7804 ptr_vals = recv_buffer_vals; 7805 ptr_idxs_is = recv_buffer_idxs_is; 7806 ptr_vecs = recv_buffer_vecs; 7807 for (i=0;i<n_recvs;i++) { 7808 source_dest = onodes[i]; 7809 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7810 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7811 ptr_idxs += olengths_idxs[i]; 7812 ptr_vals += olengths_vals[i]; 7813 if (nis) { 7814 source_dest = onodes_is[i]; 7815 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7816 ptr_idxs_is += olengths_idxs_is[i]; 7817 } 7818 if (nvecs) { 7819 source_dest = onodes[i]; 7820 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7821 ptr_vecs += olengths_idxs[i]-2; 7822 } 7823 } 7824 for (i=0;i<n_sends;i++) { 7825 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7826 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7827 ierr = MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7828 if (nis) { 7829 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7830 } 7831 if (nvecs) { 7832 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7833 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7834 } 7835 } 7836 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7837 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7838 7839 /* assemble new l2g map */ 7840 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7841 ptr_idxs = recv_buffer_idxs; 7842 new_local_rows = 0; 7843 for (i=0;i<n_recvs;i++) { 7844 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7845 ptr_idxs += olengths_idxs[i]; 7846 } 7847 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7848 ptr_idxs = recv_buffer_idxs; 7849 new_local_rows = 0; 7850 for (i=0;i<n_recvs;i++) { 7851 ierr = PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));CHKERRQ(ierr); 7852 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7853 ptr_idxs += olengths_idxs[i]; 7854 } 7855 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7856 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7857 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7858 7859 /* infer new local matrix type from received local matrices type */ 7860 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7861 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7862 if (n_recvs) { 7863 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7864 ptr_idxs = recv_buffer_idxs; 7865 for (i=0;i<n_recvs;i++) { 7866 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7867 new_local_type_private = MATAIJ_PRIVATE; 7868 break; 7869 } 7870 ptr_idxs += olengths_idxs[i]; 7871 } 7872 switch (new_local_type_private) { 7873 case MATDENSE_PRIVATE: 7874 new_local_type = MATSEQAIJ; 7875 bs = 1; 7876 break; 7877 case MATAIJ_PRIVATE: 7878 new_local_type = MATSEQAIJ; 7879 bs = 1; 7880 break; 7881 case MATBAIJ_PRIVATE: 7882 new_local_type = MATSEQBAIJ; 7883 break; 7884 case MATSBAIJ_PRIVATE: 7885 new_local_type = MATSEQSBAIJ; 7886 break; 7887 default: 7888 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7889 } 7890 } else { /* by default, new_local_type is seqaij */ 7891 new_local_type = MATSEQAIJ; 7892 bs = 1; 7893 } 7894 7895 /* create MATIS object if needed */ 7896 if (!reuse) { 7897 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7898 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7899 } else { 7900 /* it also destroys the local matrices */ 7901 if (*mat_n) { 7902 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7903 } else { /* this is a fake object */ 7904 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7905 } 7906 } 7907 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7908 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7909 7910 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7911 7912 /* Global to local map of received indices */ 7913 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7914 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7915 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7916 7917 /* restore attributes -> type of incoming data and its size */ 7918 buf_size_idxs = 0; 7919 for (i=0;i<n_recvs;i++) { 7920 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7921 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7922 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7923 } 7924 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7925 7926 /* set preallocation */ 7927 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7928 if (!newisdense) { 7929 PetscInt *new_local_nnz=NULL; 7930 7931 ptr_idxs = recv_buffer_idxs_local; 7932 if (n_recvs) { 7933 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7934 } 7935 for (i=0;i<n_recvs;i++) { 7936 PetscInt j; 7937 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7938 for (j=0;j<*(ptr_idxs+1);j++) { 7939 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7940 } 7941 } else { 7942 /* TODO */ 7943 } 7944 ptr_idxs += olengths_idxs[i]; 7945 } 7946 if (new_local_nnz) { 7947 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7948 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7949 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7950 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7951 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7952 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7953 } else { 7954 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7955 } 7956 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7957 } else { 7958 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7959 } 7960 7961 /* set values */ 7962 ptr_vals = recv_buffer_vals; 7963 ptr_idxs = recv_buffer_idxs_local; 7964 for (i=0;i<n_recvs;i++) { 7965 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7966 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7967 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7968 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7969 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7970 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7971 } else { 7972 /* TODO */ 7973 } 7974 ptr_idxs += olengths_idxs[i]; 7975 ptr_vals += olengths_vals[i]; 7976 } 7977 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7978 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7979 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7980 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7981 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7982 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7983 7984 #if 0 7985 if (!restrict_comm) { /* check */ 7986 Vec lvec,rvec; 7987 PetscReal infty_error; 7988 7989 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7990 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7991 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7992 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7993 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7994 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7995 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7996 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7997 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7998 } 7999 #endif 8000 8001 /* assemble new additional is (if any) */ 8002 if (nis) { 8003 PetscInt **temp_idxs,*count_is,j,psum; 8004 8005 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8006 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 8007 ptr_idxs = recv_buffer_idxs_is; 8008 psum = 0; 8009 for (i=0;i<n_recvs;i++) { 8010 for (j=0;j<nis;j++) { 8011 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8012 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8013 psum += plen; 8014 ptr_idxs += plen+1; /* shift pointer to received data */ 8015 } 8016 } 8017 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 8018 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 8019 for (i=1;i<nis;i++) { 8020 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 8021 } 8022 ierr = PetscArrayzero(count_is,nis);CHKERRQ(ierr); 8023 ptr_idxs = recv_buffer_idxs_is; 8024 for (i=0;i<n_recvs;i++) { 8025 for (j=0;j<nis;j++) { 8026 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8027 ierr = PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);CHKERRQ(ierr); 8028 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8029 ptr_idxs += plen+1; /* shift pointer to received data */ 8030 } 8031 } 8032 for (i=0;i<nis;i++) { 8033 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8034 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr); 8035 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8036 } 8037 ierr = PetscFree(count_is);CHKERRQ(ierr); 8038 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 8039 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 8040 } 8041 /* free workspace */ 8042 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 8043 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8044 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 8045 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8046 if (isdense) { 8047 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 8048 ierr = MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 8049 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 8050 } else { 8051 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 8052 } 8053 if (nis) { 8054 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8055 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 8056 } 8057 8058 if (nvecs) { 8059 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8060 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8061 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8062 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8063 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 8064 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 8065 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 8066 /* set values */ 8067 ptr_vals = recv_buffer_vecs; 8068 ptr_idxs = recv_buffer_idxs_local; 8069 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8070 for (i=0;i<n_recvs;i++) { 8071 PetscInt j; 8072 for (j=0;j<*(ptr_idxs+1);j++) { 8073 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8074 } 8075 ptr_idxs += olengths_idxs[i]; 8076 ptr_vals += olengths_idxs[i]-2; 8077 } 8078 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8079 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 8080 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 8081 } 8082 8083 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 8084 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 8085 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 8086 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 8087 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 8088 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 8089 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 8090 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 8091 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 8092 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 8093 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 8094 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 8095 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 8096 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 8097 ierr = PetscFree(onodes);CHKERRQ(ierr); 8098 if (nis) { 8099 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 8100 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 8101 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 8102 } 8103 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 8104 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8105 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 8106 for (i=0;i<nis;i++) { 8107 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8108 } 8109 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8110 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8111 } 8112 *mat_n = NULL; 8113 } 8114 PetscFunctionReturn(0); 8115 } 8116 8117 /* temporary hack into ksp private data structure */ 8118 #include <petsc/private/kspimpl.h> 8119 8120 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8121 { 8122 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8123 PC_IS *pcis = (PC_IS*)pc->data; 8124 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8125 Mat coarsedivudotp = NULL; 8126 Mat coarseG,t_coarse_mat_is; 8127 MatNullSpace CoarseNullSpace = NULL; 8128 ISLocalToGlobalMapping coarse_islg; 8129 IS coarse_is,*isarray,corners; 8130 PetscInt i,im_active=-1,active_procs=-1; 8131 PetscInt nis,nisdofs,nisneu,nisvert; 8132 PetscInt coarse_eqs_per_proc; 8133 PC pc_temp; 8134 PCType coarse_pc_type; 8135 KSPType coarse_ksp_type; 8136 PetscBool multilevel_requested,multilevel_allowed; 8137 PetscBool coarse_reuse; 8138 PetscInt ncoarse,nedcfield; 8139 PetscBool compute_vecs = PETSC_FALSE; 8140 PetscScalar *array; 8141 MatReuse coarse_mat_reuse; 8142 PetscBool restr, full_restr, have_void; 8143 PetscMPIInt size; 8144 PetscErrorCode ierr; 8145 8146 PetscFunctionBegin; 8147 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8148 /* Assign global numbering to coarse dofs */ 8149 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8150 PetscInt ocoarse_size; 8151 compute_vecs = PETSC_TRUE; 8152 8153 pcbddc->new_primal_space = PETSC_TRUE; 8154 ocoarse_size = pcbddc->coarse_size; 8155 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 8156 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 8157 /* see if we can avoid some work */ 8158 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8159 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8160 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8161 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 8162 coarse_reuse = PETSC_FALSE; 8163 } else { /* we can safely reuse already computed coarse matrix */ 8164 coarse_reuse = PETSC_TRUE; 8165 } 8166 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8167 coarse_reuse = PETSC_FALSE; 8168 } 8169 /* reset any subassembling information */ 8170 if (!coarse_reuse || pcbddc->recompute_topography) { 8171 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8172 } 8173 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8174 coarse_reuse = PETSC_TRUE; 8175 } 8176 if (coarse_reuse && pcbddc->coarse_ksp) { 8177 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 8178 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 8179 coarse_mat_reuse = MAT_REUSE_MATRIX; 8180 } else { 8181 coarse_mat = NULL; 8182 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8183 } 8184 8185 /* creates temporary l2gmap and IS for coarse indexes */ 8186 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 8187 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 8188 8189 /* creates temporary MATIS object for coarse matrix */ 8190 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 8191 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 8192 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 8193 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8194 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8195 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 8196 8197 /* count "active" (i.e. with positive local size) and "void" processes */ 8198 im_active = !!(pcis->n); 8199 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8200 8201 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8202 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8203 /* full_restr : just use the receivers from the subassembling pattern */ 8204 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 8205 coarse_mat_is = NULL; 8206 multilevel_allowed = PETSC_FALSE; 8207 multilevel_requested = PETSC_FALSE; 8208 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8209 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8210 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8211 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8212 if (multilevel_requested) { 8213 ncoarse = active_procs/pcbddc->coarsening_ratio; 8214 restr = PETSC_FALSE; 8215 full_restr = PETSC_FALSE; 8216 } else { 8217 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8218 restr = PETSC_TRUE; 8219 full_restr = PETSC_TRUE; 8220 } 8221 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8222 ncoarse = PetscMax(1,ncoarse); 8223 if (!pcbddc->coarse_subassembling) { 8224 if (pcbddc->coarsening_ratio > 1) { 8225 if (multilevel_requested) { 8226 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8227 } else { 8228 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8229 } 8230 } else { 8231 PetscMPIInt rank; 8232 8233 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 8234 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8235 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8236 } 8237 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8238 PetscInt psum; 8239 if (pcbddc->coarse_ksp) psum = 1; 8240 else psum = 0; 8241 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8242 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8243 } 8244 /* determine if we can go multilevel */ 8245 if (multilevel_requested) { 8246 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8247 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8248 } 8249 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8250 8251 /* dump subassembling pattern */ 8252 if (pcbddc->dbg_flag && multilevel_allowed) { 8253 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 8254 } 8255 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8256 nedcfield = -1; 8257 corners = NULL; 8258 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8259 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8260 const PetscInt *idxs; 8261 ISLocalToGlobalMapping tmap; 8262 8263 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8264 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 8265 /* allocate space for temporary storage */ 8266 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 8267 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 8268 /* allocate for IS array */ 8269 nisdofs = pcbddc->n_ISForDofsLocal; 8270 if (pcbddc->nedclocal) { 8271 if (pcbddc->nedfield > -1) { 8272 nedcfield = pcbddc->nedfield; 8273 } else { 8274 nedcfield = 0; 8275 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8276 nisdofs = 1; 8277 } 8278 } 8279 nisneu = !!pcbddc->NeumannBoundariesLocal; 8280 nisvert = 0; /* nisvert is not used */ 8281 nis = nisdofs + nisneu + nisvert; 8282 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 8283 /* dofs splitting */ 8284 for (i=0;i<nisdofs;i++) { 8285 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 8286 if (nedcfield != i) { 8287 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 8288 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8289 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8290 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8291 } else { 8292 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8293 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8294 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8295 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8296 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8297 } 8298 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8299 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8300 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8301 } 8302 /* neumann boundaries */ 8303 if (pcbddc->NeumannBoundariesLocal) { 8304 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8305 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8306 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8307 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8308 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8309 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8310 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8311 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8312 } 8313 /* coordinates */ 8314 if (pcbddc->corner_selected) { 8315 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8316 ierr = ISGetLocalSize(corners,&tsize);CHKERRQ(ierr); 8317 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8318 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8319 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8320 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8321 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8322 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8323 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);CHKERRQ(ierr); 8324 } 8325 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8326 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8327 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8328 } else { 8329 nis = 0; 8330 nisdofs = 0; 8331 nisneu = 0; 8332 nisvert = 0; 8333 isarray = NULL; 8334 } 8335 /* destroy no longer needed map */ 8336 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8337 8338 /* subassemble */ 8339 if (multilevel_allowed) { 8340 Vec vp[1]; 8341 PetscInt nvecs = 0; 8342 PetscBool reuse,reuser; 8343 8344 if (coarse_mat) reuse = PETSC_TRUE; 8345 else reuse = PETSC_FALSE; 8346 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8347 vp[0] = NULL; 8348 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8349 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8350 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8351 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8352 nvecs = 1; 8353 8354 if (pcbddc->divudotp) { 8355 Mat B,loc_divudotp; 8356 Vec v,p; 8357 IS dummy; 8358 PetscInt np; 8359 8360 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8361 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8362 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8363 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8364 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8365 ierr = VecSet(p,1.);CHKERRQ(ierr); 8366 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8367 ierr = VecDestroy(&p);CHKERRQ(ierr); 8368 ierr = MatDestroy(&B);CHKERRQ(ierr); 8369 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8370 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8371 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8372 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8373 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8374 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8375 ierr = VecDestroy(&v);CHKERRQ(ierr); 8376 } 8377 } 8378 if (reuser) { 8379 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8380 } else { 8381 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8382 } 8383 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8384 PetscScalar *arraym; 8385 const PetscScalar *arrayv; 8386 PetscInt nl; 8387 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8388 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8389 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8390 ierr = VecGetArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8391 ierr = PetscArraycpy(arraym,arrayv,nl);CHKERRQ(ierr); 8392 ierr = VecRestoreArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8393 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8394 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8395 } else { 8396 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8397 } 8398 } else { 8399 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8400 } 8401 if (coarse_mat_is || coarse_mat) { 8402 if (!multilevel_allowed) { 8403 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8404 } else { 8405 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8406 if (coarse_mat_is) { 8407 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8408 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8409 coarse_mat = coarse_mat_is; 8410 } 8411 } 8412 } 8413 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8414 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8415 8416 /* create local to global scatters for coarse problem */ 8417 if (compute_vecs) { 8418 PetscInt lrows; 8419 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8420 if (coarse_mat) { 8421 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8422 } else { 8423 lrows = 0; 8424 } 8425 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8426 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8427 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8428 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8429 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8430 } 8431 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8432 8433 /* set defaults for coarse KSP and PC */ 8434 if (multilevel_allowed) { 8435 coarse_ksp_type = KSPRICHARDSON; 8436 coarse_pc_type = PCBDDC; 8437 } else { 8438 coarse_ksp_type = KSPPREONLY; 8439 coarse_pc_type = PCREDUNDANT; 8440 } 8441 8442 /* print some info if requested */ 8443 if (pcbddc->dbg_flag) { 8444 if (!multilevel_allowed) { 8445 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8446 if (multilevel_requested) { 8447 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8448 } else if (pcbddc->max_levels) { 8449 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8450 } 8451 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8452 } 8453 } 8454 8455 /* communicate coarse discrete gradient */ 8456 coarseG = NULL; 8457 if (pcbddc->nedcG && multilevel_allowed) { 8458 MPI_Comm ccomm; 8459 if (coarse_mat) { 8460 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8461 } else { 8462 ccomm = MPI_COMM_NULL; 8463 } 8464 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8465 } 8466 8467 /* create the coarse KSP object only once with defaults */ 8468 if (coarse_mat) { 8469 PetscBool isredundant,isbddc,force,valid; 8470 PetscViewer dbg_viewer = NULL; 8471 8472 if (pcbddc->dbg_flag) { 8473 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8474 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8475 } 8476 if (!pcbddc->coarse_ksp) { 8477 char prefix[256],str_level[16]; 8478 size_t len; 8479 8480 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8481 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8482 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8483 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8484 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8485 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8486 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8487 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8488 /* TODO is this logic correct? should check for coarse_mat type */ 8489 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8490 /* prefix */ 8491 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8492 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8493 if (!pcbddc->current_level) { 8494 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8495 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8496 } else { 8497 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8498 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8499 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8500 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8501 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8502 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8503 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8504 } 8505 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8506 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8507 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8508 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8509 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8510 /* allow user customization */ 8511 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8512 /* get some info after set from options */ 8513 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8514 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8515 force = PETSC_FALSE; 8516 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8517 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8518 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8519 if (multilevel_allowed && !force && !valid) { 8520 isbddc = PETSC_TRUE; 8521 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8522 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8523 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8524 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8525 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8526 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);CHKERRQ(ierr); 8527 ierr = (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);CHKERRQ(ierr); 8528 ierr = PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);CHKERRQ(ierr); 8529 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8530 pc_temp->setfromoptionscalled++; 8531 } 8532 } 8533 } 8534 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8535 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8536 if (nisdofs) { 8537 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8538 for (i=0;i<nisdofs;i++) { 8539 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8540 } 8541 } 8542 if (nisneu) { 8543 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8544 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8545 } 8546 if (nisvert) { 8547 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8548 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8549 } 8550 if (coarseG) { 8551 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8552 } 8553 8554 /* get some info after set from options */ 8555 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8556 8557 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8558 if (isbddc && !multilevel_allowed) { 8559 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8560 } 8561 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8562 force = PETSC_FALSE; 8563 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8564 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8565 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8566 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8567 } 8568 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8569 if (isredundant) { 8570 KSP inner_ksp; 8571 PC inner_pc; 8572 8573 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8574 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8575 } 8576 8577 /* parameters which miss an API */ 8578 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8579 if (isbddc) { 8580 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8581 8582 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8583 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8584 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8585 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8586 if (pcbddc_coarse->benign_saddle_point) { 8587 Mat coarsedivudotp_is; 8588 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8589 IS row,col; 8590 const PetscInt *gidxs; 8591 PetscInt n,st,M,N; 8592 8593 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8594 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8595 st = st-n; 8596 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8597 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8598 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8599 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8600 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8601 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8602 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8603 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8604 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8605 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8606 ierr = ISDestroy(&row);CHKERRQ(ierr); 8607 ierr = ISDestroy(&col);CHKERRQ(ierr); 8608 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8609 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8610 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8611 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8612 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8613 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8614 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8615 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8616 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8617 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8618 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8619 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8620 } 8621 } 8622 8623 /* propagate symmetry info of coarse matrix */ 8624 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8625 if (pc->pmat->symmetric_set) { 8626 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8627 } 8628 if (pc->pmat->hermitian_set) { 8629 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8630 } 8631 if (pc->pmat->spd_set) { 8632 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8633 } 8634 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8635 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8636 } 8637 /* set operators */ 8638 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8639 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8640 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8641 if (pcbddc->dbg_flag) { 8642 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8643 } 8644 } 8645 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8646 ierr = PetscFree(isarray);CHKERRQ(ierr); 8647 #if 0 8648 { 8649 PetscViewer viewer; 8650 char filename[256]; 8651 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8652 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8653 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8654 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8655 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8656 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8657 } 8658 #endif 8659 8660 if (corners) { 8661 Vec gv; 8662 IS is; 8663 const PetscInt *idxs; 8664 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8665 PetscScalar *coords; 8666 8667 if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8668 ierr = VecGetSize(pcbddc->coarse_vec,&N);CHKERRQ(ierr); 8669 ierr = VecGetLocalSize(pcbddc->coarse_vec,&n);CHKERRQ(ierr); 8670 ierr = VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);CHKERRQ(ierr); 8671 ierr = VecSetBlockSize(gv,cdim);CHKERRQ(ierr); 8672 ierr = VecSetSizes(gv,n*cdim,N*cdim);CHKERRQ(ierr); 8673 ierr = VecSetType(gv,VECSTANDARD);CHKERRQ(ierr); 8674 ierr = VecSetFromOptions(gv);CHKERRQ(ierr); 8675 ierr = VecSet(gv,PETSC_MAX_REAL);CHKERRQ(ierr); /* we only propagate coordinates from vertices constraints */ 8676 8677 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8678 ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); 8679 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 8680 ierr = PetscMalloc1(n*cdim,&coords);CHKERRQ(ierr); 8681 for (i=0;i<n;i++) { 8682 for (d=0;d<cdim;d++) { 8683 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8684 } 8685 } 8686 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 8687 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8688 8689 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 8690 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8691 ierr = VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);CHKERRQ(ierr); 8692 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8693 ierr = PetscFree(coords);CHKERRQ(ierr); 8694 ierr = VecAssemblyBegin(gv);CHKERRQ(ierr); 8695 ierr = VecAssemblyEnd(gv);CHKERRQ(ierr); 8696 ierr = VecGetArray(gv,&coords);CHKERRQ(ierr); 8697 if (pcbddc->coarse_ksp) { 8698 PC coarse_pc; 8699 PetscBool isbddc; 8700 8701 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 8702 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 8703 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8704 PetscReal *realcoords; 8705 8706 ierr = VecGetLocalSize(gv,&n);CHKERRQ(ierr); 8707 #if defined(PETSC_USE_COMPLEX) 8708 ierr = PetscMalloc1(n,&realcoords);CHKERRQ(ierr); 8709 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8710 #else 8711 realcoords = coords; 8712 #endif 8713 ierr = PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);CHKERRQ(ierr); 8714 #if defined(PETSC_USE_COMPLEX) 8715 ierr = PetscFree(realcoords);CHKERRQ(ierr); 8716 #endif 8717 } 8718 } 8719 ierr = VecRestoreArray(gv,&coords);CHKERRQ(ierr); 8720 ierr = VecDestroy(&gv);CHKERRQ(ierr); 8721 } 8722 ierr = ISDestroy(&corners);CHKERRQ(ierr); 8723 8724 if (pcbddc->coarse_ksp) { 8725 Vec crhs,csol; 8726 8727 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8728 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8729 if (!csol) { 8730 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8731 } 8732 if (!crhs) { 8733 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8734 } 8735 } 8736 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8737 8738 /* compute null space for coarse solver if the benign trick has been requested */ 8739 if (pcbddc->benign_null) { 8740 8741 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8742 for (i=0;i<pcbddc->benign_n;i++) { 8743 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8744 } 8745 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8746 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8747 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8748 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8749 if (coarse_mat) { 8750 Vec nullv; 8751 PetscScalar *array,*array2; 8752 PetscInt nl; 8753 8754 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8755 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8756 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8757 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8758 ierr = PetscArraycpy(array2,array,nl);CHKERRQ(ierr); 8759 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8760 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8761 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8762 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8763 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8764 } 8765 } 8766 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8767 8768 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8769 if (pcbddc->coarse_ksp) { 8770 PetscBool ispreonly; 8771 8772 if (CoarseNullSpace) { 8773 PetscBool isnull; 8774 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8775 if (isnull) { 8776 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8777 } 8778 /* TODO: add local nullspaces (if any) */ 8779 } 8780 /* setup coarse ksp */ 8781 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8782 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8783 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8784 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8785 KSP check_ksp; 8786 KSPType check_ksp_type; 8787 PC check_pc; 8788 Vec check_vec,coarse_vec; 8789 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8790 PetscInt its; 8791 PetscBool compute_eigs; 8792 PetscReal *eigs_r,*eigs_c; 8793 PetscInt neigs; 8794 const char *prefix; 8795 8796 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8797 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8798 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8799 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8800 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8801 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8802 /* prevent from setup unneeded object */ 8803 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8804 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8805 if (ispreonly) { 8806 check_ksp_type = KSPPREONLY; 8807 compute_eigs = PETSC_FALSE; 8808 } else { 8809 check_ksp_type = KSPGMRES; 8810 compute_eigs = PETSC_TRUE; 8811 } 8812 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8813 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8814 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8815 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8816 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8817 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8818 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8819 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8820 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8821 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8822 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8823 /* create random vec */ 8824 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8825 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8826 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8827 /* solve coarse problem */ 8828 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8829 ierr = KSPCheckSolve(check_ksp,pc,coarse_vec);CHKERRQ(ierr); 8830 /* set eigenvalue estimation if preonly has not been requested */ 8831 if (compute_eigs) { 8832 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8833 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8834 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8835 if (neigs) { 8836 lambda_max = eigs_r[neigs-1]; 8837 lambda_min = eigs_r[0]; 8838 if (pcbddc->use_coarse_estimates) { 8839 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8840 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8841 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8842 } 8843 } 8844 } 8845 } 8846 8847 /* check coarse problem residual error */ 8848 if (pcbddc->dbg_flag) { 8849 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8850 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8851 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8852 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8853 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8854 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8855 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8856 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8857 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8858 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8859 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8860 if (CoarseNullSpace) { 8861 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8862 } 8863 if (compute_eigs) { 8864 PetscReal lambda_max_s,lambda_min_s; 8865 KSPConvergedReason reason; 8866 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8867 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8868 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8869 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8870 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8871 for (i=0;i<neigs;i++) { 8872 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8873 } 8874 } 8875 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8876 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8877 } 8878 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8879 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8880 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8881 if (compute_eigs) { 8882 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8883 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8884 } 8885 } 8886 } 8887 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8888 /* print additional info */ 8889 if (pcbddc->dbg_flag) { 8890 /* waits until all processes reaches this point */ 8891 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8892 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8894 } 8895 8896 /* free memory */ 8897 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8898 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8899 PetscFunctionReturn(0); 8900 } 8901 8902 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8903 { 8904 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8905 PC_IS* pcis = (PC_IS*)pc->data; 8906 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8907 IS subset,subset_mult,subset_n; 8908 PetscInt local_size,coarse_size=0; 8909 PetscInt *local_primal_indices=NULL; 8910 const PetscInt *t_local_primal_indices; 8911 PetscErrorCode ierr; 8912 8913 PetscFunctionBegin; 8914 /* Compute global number of coarse dofs */ 8915 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8916 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8917 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8918 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8919 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8920 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8921 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8922 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8923 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8924 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8925 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8926 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8927 ierr = PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);CHKERRQ(ierr); 8928 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8929 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8930 8931 /* check numbering */ 8932 if (pcbddc->dbg_flag) { 8933 PetscScalar coarsesum,*array,*array2; 8934 PetscInt i; 8935 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8936 8937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8938 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8939 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8940 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8941 /* counter */ 8942 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8943 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8944 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8945 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8946 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8947 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8948 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8949 for (i=0;i<pcbddc->local_primal_size;i++) { 8950 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8951 } 8952 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8953 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8954 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8955 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8956 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8957 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8958 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8959 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8960 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8961 for (i=0;i<pcis->n;i++) { 8962 if (array[i] != 0.0 && array[i] != array2[i]) { 8963 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8964 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8965 set_error = PETSC_TRUE; 8966 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8968 } 8969 } 8970 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8971 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8972 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8973 for (i=0;i<pcis->n;i++) { 8974 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8975 } 8976 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8977 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8978 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8979 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8980 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8981 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8982 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8983 PetscInt *gidxs; 8984 8985 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8986 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8987 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8988 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8990 for (i=0;i<pcbddc->local_primal_size;i++) { 8991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8992 } 8993 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8994 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8995 } 8996 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8997 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8998 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8999 } 9000 9001 /* get back data */ 9002 *coarse_size_n = coarse_size; 9003 *local_primal_indices_n = local_primal_indices; 9004 PetscFunctionReturn(0); 9005 } 9006 9007 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 9008 { 9009 IS localis_t; 9010 PetscInt i,lsize,*idxs,n; 9011 PetscScalar *vals; 9012 PetscErrorCode ierr; 9013 9014 PetscFunctionBegin; 9015 /* get indices in local ordering exploiting local to global map */ 9016 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 9017 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 9018 for (i=0;i<lsize;i++) vals[i] = 1.0; 9019 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9020 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 9021 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 9022 if (idxs) { /* multilevel guard */ 9023 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 9024 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 9025 } 9026 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 9027 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9028 ierr = PetscFree(vals);CHKERRQ(ierr); 9029 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 9030 /* now compute set in local ordering */ 9031 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9032 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9033 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9034 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 9035 for (i=0,lsize=0;i<n;i++) { 9036 if (PetscRealPart(vals[i]) > 0.5) { 9037 lsize++; 9038 } 9039 } 9040 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 9041 for (i=0,lsize=0;i<n;i++) { 9042 if (PetscRealPart(vals[i]) > 0.5) { 9043 idxs[lsize++] = i; 9044 } 9045 } 9046 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9047 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 9048 *localis = localis_t; 9049 PetscFunctionReturn(0); 9050 } 9051 9052 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9053 { 9054 PC_IS *pcis=(PC_IS*)pc->data; 9055 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9056 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9057 Mat S_j; 9058 PetscInt *used_xadj,*used_adjncy; 9059 PetscBool free_used_adj; 9060 PetscErrorCode ierr; 9061 9062 PetscFunctionBegin; 9063 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9064 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9065 free_used_adj = PETSC_FALSE; 9066 if (pcbddc->sub_schurs_layers == -1) { 9067 used_xadj = NULL; 9068 used_adjncy = NULL; 9069 } else { 9070 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9071 used_xadj = pcbddc->mat_graph->xadj; 9072 used_adjncy = pcbddc->mat_graph->adjncy; 9073 } else if (pcbddc->computed_rowadj) { 9074 used_xadj = pcbddc->mat_graph->xadj; 9075 used_adjncy = pcbddc->mat_graph->adjncy; 9076 } else { 9077 PetscBool flg_row=PETSC_FALSE; 9078 const PetscInt *xadj,*adjncy; 9079 PetscInt nvtxs; 9080 9081 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9082 if (flg_row) { 9083 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 9084 ierr = PetscArraycpy(used_xadj,xadj,nvtxs+1);CHKERRQ(ierr); 9085 ierr = PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);CHKERRQ(ierr); 9086 free_used_adj = PETSC_TRUE; 9087 } else { 9088 pcbddc->sub_schurs_layers = -1; 9089 used_xadj = NULL; 9090 used_adjncy = NULL; 9091 } 9092 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9093 } 9094 } 9095 9096 /* setup sub_schurs data */ 9097 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9098 if (!sub_schurs->schur_explicit) { 9099 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9100 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9101 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 9102 } else { 9103 Mat change = NULL; 9104 Vec scaling = NULL; 9105 IS change_primal = NULL, iP; 9106 PetscInt benign_n; 9107 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9108 PetscBool need_change = PETSC_FALSE; 9109 PetscBool discrete_harmonic = PETSC_FALSE; 9110 9111 if (!pcbddc->use_vertices && reuse_solvers) { 9112 PetscInt n_vertices; 9113 9114 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 9115 reuse_solvers = (PetscBool)!n_vertices; 9116 } 9117 if (!pcbddc->benign_change_explicit) { 9118 benign_n = pcbddc->benign_n; 9119 } else { 9120 benign_n = 0; 9121 } 9122 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9123 We need a global reduction to avoid possible deadlocks. 9124 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9125 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9126 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9127 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 9128 need_change = (PetscBool)(!need_change); 9129 } 9130 /* If the user defines additional constraints, we import them here. 9131 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9132 if (need_change) { 9133 PC_IS *pcisf; 9134 PC_BDDC *pcbddcf; 9135 PC pcf; 9136 9137 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9138 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 9139 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 9140 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 9141 9142 /* hacks */ 9143 pcisf = (PC_IS*)pcf->data; 9144 pcisf->is_B_local = pcis->is_B_local; 9145 pcisf->vec1_N = pcis->vec1_N; 9146 pcisf->BtoNmap = pcis->BtoNmap; 9147 pcisf->n = pcis->n; 9148 pcisf->n_B = pcis->n_B; 9149 pcbddcf = (PC_BDDC*)pcf->data; 9150 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 9151 pcbddcf->mat_graph = pcbddc->mat_graph; 9152 pcbddcf->use_faces = PETSC_TRUE; 9153 pcbddcf->use_change_of_basis = PETSC_TRUE; 9154 pcbddcf->use_change_on_faces = PETSC_TRUE; 9155 pcbddcf->use_qr_single = PETSC_TRUE; 9156 pcbddcf->fake_change = PETSC_TRUE; 9157 9158 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9159 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 9160 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9161 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 9162 change = pcbddcf->ConstraintMatrix; 9163 pcbddcf->ConstraintMatrix = NULL; 9164 9165 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9166 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 9167 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 9168 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 9169 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 9170 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 9171 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 9172 pcf->ops->destroy = NULL; 9173 pcf->ops->reset = NULL; 9174 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 9175 } 9176 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9177 9178 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 9179 if (iP) { 9180 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 9181 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 9182 ierr = PetscOptionsEnd();CHKERRQ(ierr); 9183 } 9184 if (discrete_harmonic) { 9185 Mat A; 9186 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 9187 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 9188 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 9189 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9190 ierr = MatDestroy(&A);CHKERRQ(ierr); 9191 } else { 9192 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9193 } 9194 ierr = MatDestroy(&change);CHKERRQ(ierr); 9195 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 9196 } 9197 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9198 9199 /* free adjacency */ 9200 if (free_used_adj) { 9201 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 9202 } 9203 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9204 PetscFunctionReturn(0); 9205 } 9206 9207 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9208 { 9209 PC_IS *pcis=(PC_IS*)pc->data; 9210 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9211 PCBDDCGraph graph; 9212 PetscErrorCode ierr; 9213 9214 PetscFunctionBegin; 9215 /* attach interface graph for determining subsets */ 9216 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9217 IS verticesIS,verticescomm; 9218 PetscInt vsize,*idxs; 9219 9220 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9221 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 9222 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 9224 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9225 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9226 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 9227 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 9228 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 9229 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 9230 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 9231 } else { 9232 graph = pcbddc->mat_graph; 9233 } 9234 /* print some info */ 9235 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9236 IS vertices; 9237 PetscInt nv,nedges,nfaces; 9238 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 9239 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9240 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 9241 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 9243 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 9244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 9245 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 9246 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9247 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9248 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9249 } 9250 9251 /* sub_schurs init */ 9252 if (!pcbddc->sub_schurs) { 9253 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 9254 } 9255 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 9256 9257 /* free graph struct */ 9258 if (pcbddc->sub_schurs_rebuild) { 9259 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 9260 } 9261 PetscFunctionReturn(0); 9262 } 9263 9264 PetscErrorCode PCBDDCCheckOperator(PC pc) 9265 { 9266 PC_IS *pcis=(PC_IS*)pc->data; 9267 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9268 PetscErrorCode ierr; 9269 9270 PetscFunctionBegin; 9271 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9272 IS zerodiag = NULL; 9273 Mat S_j,B0_B=NULL; 9274 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9275 PetscScalar *p0_check,*array,*array2; 9276 PetscReal norm; 9277 PetscInt i; 9278 9279 /* B0 and B0_B */ 9280 if (zerodiag) { 9281 IS dummy; 9282 9283 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 9284 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 9285 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 9286 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 9287 } 9288 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9289 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 9290 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 9291 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9292 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9293 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9294 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9295 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 9296 /* S_j */ 9297 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9298 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9299 9300 /* mimic vector in \widetilde{W}_\Gamma */ 9301 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 9302 /* continuous in primal space */ 9303 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 9304 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9305 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9306 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9307 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 9308 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9309 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9310 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9311 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9312 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9313 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9314 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9315 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 9316 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 9317 9318 /* assemble rhs for coarse problem */ 9319 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9320 /* local with Schur */ 9321 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 9322 if (zerodiag) { 9323 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9324 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9325 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9326 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 9327 } 9328 /* sum on primal nodes the local contributions */ 9329 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9330 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9331 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9332 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9333 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9334 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9335 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9336 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 9337 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9338 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9339 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9340 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9341 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9342 /* scale primal nodes (BDDC sums contibutions) */ 9343 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 9344 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9345 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9346 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9347 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9348 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9349 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9350 /* global: \widetilde{B0}_B w_\Gamma */ 9351 if (zerodiag) { 9352 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 9353 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9354 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9355 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9356 } 9357 /* BDDC */ 9358 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 9359 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 9360 9361 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 9362 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 9363 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 9364 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 9365 for (i=0;i<pcbddc->benign_n;i++) { 9366 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 9367 } 9368 ierr = PetscFree(p0_check);CHKERRQ(ierr); 9369 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 9370 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 9371 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 9372 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9373 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 9374 } 9375 PetscFunctionReturn(0); 9376 } 9377 9378 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9379 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9380 { 9381 Mat At; 9382 IS rows; 9383 PetscInt rst,ren; 9384 PetscErrorCode ierr; 9385 PetscLayout rmap; 9386 9387 PetscFunctionBegin; 9388 rst = ren = 0; 9389 if (ccomm != MPI_COMM_NULL) { 9390 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9391 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9392 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9393 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9394 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9395 } 9396 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9397 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9398 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9399 9400 if (ccomm != MPI_COMM_NULL) { 9401 Mat_MPIAIJ *a,*b; 9402 IS from,to; 9403 Vec gvec; 9404 PetscInt lsize; 9405 9406 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9407 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9408 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9409 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9410 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9411 a = (Mat_MPIAIJ*)At->data; 9412 b = (Mat_MPIAIJ*)(*B)->data; 9413 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9414 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9415 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9416 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9417 b->A = a->A; 9418 b->B = a->B; 9419 9420 b->donotstash = a->donotstash; 9421 b->roworiented = a->roworiented; 9422 b->rowindices = NULL; 9423 b->rowvalues = NULL; 9424 b->getrowactive = PETSC_FALSE; 9425 9426 (*B)->rmap = rmap; 9427 (*B)->factortype = A->factortype; 9428 (*B)->assembled = PETSC_TRUE; 9429 (*B)->insertmode = NOT_SET_VALUES; 9430 (*B)->preallocated = PETSC_TRUE; 9431 9432 if (a->colmap) { 9433 #if defined(PETSC_USE_CTABLE) 9434 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9435 #else 9436 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9437 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9438 ierr = PetscArraycpy(b->colmap,a->colmap,At->cmap->N);CHKERRQ(ierr); 9439 #endif 9440 } else b->colmap = NULL; 9441 if (a->garray) { 9442 PetscInt len; 9443 len = a->B->cmap->n; 9444 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9445 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9446 if (len) { ierr = PetscArraycpy(b->garray,a->garray,len);CHKERRQ(ierr); } 9447 } else b->garray = NULL; 9448 9449 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9450 b->lvec = a->lvec; 9451 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9452 9453 /* cannot use VecScatterCopy */ 9454 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9455 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9456 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9457 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9458 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9459 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9460 ierr = ISDestroy(&from);CHKERRQ(ierr); 9461 ierr = ISDestroy(&to);CHKERRQ(ierr); 9462 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9463 } 9464 ierr = MatDestroy(&At);CHKERRQ(ierr); 9465 PetscFunctionReturn(0); 9466 } 9467