1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 26 if (!nr || !nc) PetscFunctionReturn(0); 27 28 /* workspace */ 29 if (!work) { 30 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 31 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 32 } else { 33 ulw = lw; 34 uwork = work; 35 } 36 n = PetscMin(nr,nc); 37 if (!rwork) { 38 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 39 } else { 40 sing = rwork; 41 } 42 43 /* SVD */ 44 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 48 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 49 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 50 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 51 ierr = PetscFPTrapPop();CHKERRQ(ierr); 52 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 53 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 54 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 55 if (!rwork) { 56 ierr = PetscFree(sing);CHKERRQ(ierr); 57 } 58 if (!work) { 59 ierr = PetscFree(uwork);CHKERRQ(ierr); 60 } 61 /* create B */ 62 if (!range) { 63 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 64 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 65 ierr = PetscArraycpy(data,U+nr*i,(nr-i)*nr);CHKERRQ(ierr); 66 } else { 67 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 68 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 69 ierr = PetscArraycpy(data,U,i*nr);CHKERRQ(ierr); 70 } 71 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscFree(U);CHKERRQ(ierr); 73 #else /* PETSC_USE_COMPLEX */ 74 PetscFunctionBegin; 75 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 76 #endif 77 PetscFunctionReturn(0); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 PetscErrorCode ierr; 89 Mat GE,GEd; 90 PetscInt rsize,csize,esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 95 if (!esize) PetscFunctionReturn(0); 96 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 97 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 98 99 /* gradients */ 100 ptr = work + 5*esize; 101 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 103 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 104 ierr = MatDestroy(&GE);CHKERRQ(ierr); 105 106 /* constants */ 107 ptr += rsize*csize; 108 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 109 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 110 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 111 ierr = MatDestroy(&GE);CHKERRQ(ierr); 112 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 113 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 121 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 122 ierr = MatDenseGetArrayRead(GEd,&vals);CHKERRQ(ierr); 123 /* v = PetscAbsScalar(vals[0]) */; 124 v = 1.; 125 cvals[0] = vals[0]/v; 126 cvals[1] = vals[1]/v; 127 ierr = MatDenseRestoreArrayRead(GEd,&vals);CHKERRQ(ierr); 128 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 134 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 135 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 136 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 137 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 138 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 139 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 141 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 142 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 143 } 144 #endif 145 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 146 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 147 } 148 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode PCBDDCNedelecSupport(PC pc) 153 { 154 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 155 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 156 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 157 Vec tvec; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 160 MPI_Comm comm; 161 IS lned,primals,allprimals,nedfieldlocal; 162 IS *eedges,*extrows,*extcols,*alleedges; 163 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 164 PetscScalar *vals,*work; 165 PetscReal *rwork; 166 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 167 PetscInt ne,nv,Lv,order,n,field; 168 PetscInt n_neigh,*neigh,*n_shared,**shared; 169 PetscInt i,j,extmem,cum,maxsize,nee; 170 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 171 PetscInt *sfvleaves,*sfvroots; 172 PetscInt *corners,*cedges; 173 PetscInt *ecount,**eneighs,*vcount,**vneighs; 174 #if defined(PETSC_USE_DEBUG) 175 PetscInt *emarks; 176 #endif 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 /* If the discrete gradient is defined for a subset of dofs and global is true, 182 it assumes G is given in global ordering for all the dofs. 183 Otherwise, the ordering is global for the Nedelec field */ 184 order = pcbddc->nedorder; 185 conforming = pcbddc->conforming; 186 field = pcbddc->nedfield; 187 global = pcbddc->nedglobal; 188 setprimal = PETSC_FALSE; 189 print = PETSC_FALSE; 190 singular = PETSC_FALSE; 191 192 /* Command line customization */ 193 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 194 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 195 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 196 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 197 /* print debug info TODO: to be removed */ 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsEnd();CHKERRQ(ierr); 200 201 /* Return if there are no edges in the decomposition and the problem is not singular */ 202 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 203 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 204 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 205 if (!singular) { 206 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 207 lrc[0] = PETSC_FALSE; 208 for (i=0;i<n;i++) { 209 if (PetscRealPart(vals[i]) > 2.) { 210 lrc[0] = PETSC_TRUE; 211 break; 212 } 213 } 214 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 215 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 216 if (!lrc[1]) PetscFunctionReturn(0); 217 } 218 219 /* Get Nedelec field */ 220 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 221 if (pcbddc->n_ISForDofsLocal && field >= 0) { 222 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 223 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 224 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 225 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 226 ne = n; 227 nedfieldlocal = NULL; 228 global = PETSC_TRUE; 229 } else if (field == PETSC_DECIDE) { 230 PetscInt rst,ren,*idx; 231 232 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 233 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 234 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 235 for (i=rst;i<ren;i++) { 236 PetscInt nc; 237 238 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 239 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 240 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 } 242 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 243 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 244 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 245 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 246 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 247 } else { 248 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 249 } 250 251 /* Sanity checks */ 252 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 253 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 254 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 255 256 /* Just set primal dofs and return */ 257 if (setprimal) { 258 IS enedfieldlocal; 259 PetscInt *eidxs; 260 261 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 262 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 263 if (nedfieldlocal) { 264 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 265 for (i=0,cum=0;i<ne;i++) { 266 if (PetscRealPart(vals[idxs[i]]) > 2.) { 267 eidxs[cum++] = idxs[i]; 268 } 269 } 270 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 271 } else { 272 for (i=0,cum=0;i<ne;i++) { 273 if (PetscRealPart(vals[i]) > 2.) { 274 eidxs[cum++] = i; 275 } 276 } 277 } 278 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 279 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 280 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 281 ierr = PetscFree(eidxs);CHKERRQ(ierr); 282 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 283 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 /* Compute some l2g maps */ 288 if (nedfieldlocal) { 289 IS is; 290 291 /* need to map from the local Nedelec field to local numbering */ 292 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 293 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 294 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 295 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 297 if (global) { 298 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 299 el2g = al2g; 300 } else { 301 IS gis; 302 303 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 304 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 305 ierr = ISDestroy(&gis);CHKERRQ(ierr); 306 } 307 ierr = ISDestroy(&is);CHKERRQ(ierr); 308 } else { 309 /* restore default */ 310 pcbddc->nedfield = -1; 311 /* one ref for the destruction of al2g, one for el2g */ 312 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 313 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 314 el2g = al2g; 315 fl2g = NULL; 316 } 317 318 /* Start communication to drop connections for interior edges (for cc analysis only) */ 319 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 320 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 321 if (nedfieldlocal) { 322 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 324 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 } else { 326 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 327 } 328 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 329 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 330 331 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 332 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 333 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 334 if (global) { 335 PetscInt rst; 336 337 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 338 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 339 if (matis->sf_rootdata[i] < 2) { 340 matis->sf_rootdata[cum++] = i + rst; 341 } 342 } 343 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 344 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 345 } else { 346 PetscInt *tbz; 347 348 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 349 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 350 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 351 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 352 for (i=0,cum=0;i<ne;i++) 353 if (matis->sf_leafdata[idxs[i]] == 1) 354 tbz[cum++] = i; 355 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 357 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 358 ierr = PetscFree(tbz);CHKERRQ(ierr); 359 } 360 } else { /* we need the entire G to infer the nullspace */ 361 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 362 G = pcbddc->discretegradient; 363 } 364 365 /* Extract subdomain relevant rows of G */ 366 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 367 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 368 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 369 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISDestroy(&lned);CHKERRQ(ierr); 371 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 372 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 373 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 374 375 /* SF for nodal dofs communications */ 376 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 377 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 378 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 379 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 380 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 382 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 384 i = singular ? 2 : 1; 385 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 386 387 /* Destroy temporary G created in MATIS format and modified G */ 388 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 389 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 390 ierr = MatDestroy(&G);CHKERRQ(ierr); 391 392 if (print) { 393 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 394 ierr = MatView(lG,NULL);CHKERRQ(ierr); 395 } 396 397 /* Save lG for values insertion in change of basis */ 398 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 399 400 /* Analyze the edge-nodes connections (duplicate lG) */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 402 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 403 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 404 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 405 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 408 /* need to import the boundary specification to ensure the 409 proper detection of coarse edges' endpoints */ 410 if (pcbddc->DirichletBoundariesLocal) { 411 IS is; 412 413 if (fl2g) { 414 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 415 } else { 416 is = pcbddc->DirichletBoundariesLocal; 417 } 418 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 419 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 420 for (i=0;i<cum;i++) { 421 if (idxs[i] >= 0) { 422 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 423 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 424 } 425 } 426 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 427 if (fl2g) { 428 ierr = ISDestroy(&is);CHKERRQ(ierr); 429 } 430 } 431 if (pcbddc->NeumannBoundariesLocal) { 432 IS is; 433 434 if (fl2g) { 435 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 436 } else { 437 is = pcbddc->NeumannBoundariesLocal; 438 } 439 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 441 for (i=0;i<cum;i++) { 442 if (idxs[i] >= 0) { 443 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 444 } 445 } 446 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 447 if (fl2g) { 448 ierr = ISDestroy(&is);CHKERRQ(ierr); 449 } 450 } 451 452 /* Count neighs per dof */ 453 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 454 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 455 456 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 457 for proper detection of coarse edges' endpoints */ 458 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 459 for (i=0;i<ne;i++) { 460 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 461 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 462 } 463 } 464 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 465 if (!conforming) { 466 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 467 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 468 } 469 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 470 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 471 cum = 0; 472 for (i=0;i<ne;i++) { 473 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 474 if (!PetscBTLookup(btee,i)) { 475 marks[cum++] = i; 476 continue; 477 } 478 /* set badly connected edge dofs as primal */ 479 if (!conforming) { 480 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 481 marks[cum++] = i; 482 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 483 for (j=ii[i];j<ii[i+1];j++) { 484 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 485 } 486 } else { 487 /* every edge dofs should be connected trough a certain number of nodal dofs 488 to other edge dofs belonging to coarse edges 489 - at most 2 endpoints 490 - order-1 interior nodal dofs 491 - no undefined nodal dofs (nconn < order) 492 */ 493 PetscInt ends = 0,ints = 0, undef = 0; 494 for (j=ii[i];j<ii[i+1];j++) { 495 PetscInt v = jj[j],k; 496 PetscInt nconn = iit[v+1]-iit[v]; 497 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 498 if (nconn > order) ends++; 499 else if (nconn == order) ints++; 500 else undef++; 501 } 502 if (undef || ends > 2 || ints != order -1) { 503 marks[cum++] = i; 504 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 505 for (j=ii[i];j<ii[i+1];j++) { 506 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 507 } 508 } 509 } 510 } 511 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 512 if (!order && ii[i+1] != ii[i]) { 513 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 514 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 515 } 516 } 517 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 518 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 519 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 520 if (!conforming) { 521 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 522 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 523 } 524 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 525 526 /* identify splitpoints and corner candidates */ 527 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 528 if (print) { 529 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 530 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 531 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 532 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 533 } 534 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 535 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 536 for (i=0;i<nv;i++) { 537 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 538 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 539 if (!order) { /* variable order */ 540 PetscReal vorder = 0.; 541 542 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 543 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 544 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 545 ord = 1; 546 } 547 #if defined(PETSC_USE_DEBUG) 548 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 549 #endif 550 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 551 if (PetscBTLookup(btbd,jj[j])) { 552 bdir = PETSC_TRUE; 553 break; 554 } 555 if (vc != ecount[jj[j]]) { 556 sneighs = PETSC_FALSE; 557 } else { 558 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 559 for (k=0;k<vc;k++) { 560 if (vn[k] != en[k]) { 561 sneighs = PETSC_FALSE; 562 break; 563 } 564 } 565 } 566 } 567 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 568 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 569 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 570 } else if (test == ord) { 571 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 572 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 573 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 574 } else { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 576 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 577 } 578 } 579 } 580 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 581 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 582 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 583 584 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 585 if (order != 1) { 586 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 587 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 if (PetscBTLookup(btvcand,i)) { 590 PetscBool found = PETSC_FALSE; 591 for (j=ii[i];j<ii[i+1] && !found;j++) { 592 PetscInt k,e = jj[j]; 593 if (PetscBTLookup(bte,e)) continue; 594 for (k=iit[e];k<iit[e+1];k++) { 595 PetscInt v = jjt[k]; 596 if (v != i && PetscBTLookup(btvcand,v)) { 597 found = PETSC_TRUE; 598 break; 599 } 600 } 601 } 602 if (!found) { 603 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 604 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 605 } else { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 607 } 608 } 609 } 610 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 611 } 612 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 613 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 614 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 615 616 /* Get the local G^T explicitly */ 617 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 618 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 619 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 620 621 /* Mark interior nodal dofs */ 622 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 623 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 624 for (i=1;i<n_neigh;i++) { 625 for (j=0;j<n_shared[i];j++) { 626 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 627 } 628 } 629 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 630 631 /* communicate corners and splitpoints */ 632 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 633 ierr = PetscArrayzero(sfvleaves,nv);CHKERRQ(ierr); 634 ierr = PetscArrayzero(sfvroots,Lv);CHKERRQ(ierr); 635 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 636 637 if (print) { 638 IS tbz; 639 640 cum = 0; 641 for (i=0;i<nv;i++) 642 if (sfvleaves[i]) 643 vmarks[cum++] = i; 644 645 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 646 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 647 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 648 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 649 } 650 651 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 652 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 653 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 654 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 655 656 /* Zero rows of lGt corresponding to identified corners 657 and interior nodal dofs */ 658 cum = 0; 659 for (i=0;i<nv;i++) { 660 if (sfvleaves[i]) { 661 vmarks[cum++] = i; 662 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 663 } 664 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 665 } 666 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 667 if (print) { 668 IS tbz; 669 670 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 671 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 672 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 673 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 674 } 675 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 676 ierr = PetscFree(vmarks);CHKERRQ(ierr); 677 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 678 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 679 680 /* Recompute G */ 681 ierr = MatDestroy(&lG);CHKERRQ(ierr); 682 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 683 if (print) { 684 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 685 ierr = MatView(lG,NULL);CHKERRQ(ierr); 686 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 687 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 688 } 689 690 /* Get primal dofs (if any) */ 691 cum = 0; 692 for (i=0;i<ne;i++) { 693 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 694 } 695 if (fl2g) { 696 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 697 } 698 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 699 if (print) { 700 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 701 ierr = ISView(primals,NULL);CHKERRQ(ierr); 702 } 703 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 704 /* TODO: what if the user passed in some of them ? */ 705 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 706 ierr = ISDestroy(&primals);CHKERRQ(ierr); 707 708 /* Compute edge connectivity */ 709 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 710 711 /* Symbolic conn = lG*lGt */ 712 ierr = MatProductCreate(lG,lGt,NULL,&conn);CHKERRQ(ierr); 713 ierr = MatProductSetType(conn,MATPRODUCT_AB);CHKERRQ(ierr); 714 ierr = MatProductSetAlgorithm(conn,"default");CHKERRQ(ierr); 715 ierr = MatProductSetFill(conn,PETSC_DEFAULT);CHKERRQ(ierr); 716 ierr = PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");CHKERRQ(ierr); 717 ierr = MatProductSetFromOptions(conn);CHKERRQ(ierr); 718 ierr = MatProductSymbolic(conn);CHKERRQ(ierr); 719 720 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 721 if (fl2g) { 722 PetscBT btf; 723 PetscInt *iia,*jja,*iiu,*jju; 724 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 725 726 /* create CSR for all local dofs */ 727 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 728 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 729 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 730 iiu = pcbddc->mat_graph->xadj; 731 jju = pcbddc->mat_graph->adjncy; 732 } else if (pcbddc->use_local_adj) { 733 rest = PETSC_TRUE; 734 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 735 } else { 736 free = PETSC_TRUE; 737 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 738 iiu[0] = 0; 739 for (i=0;i<n;i++) { 740 iiu[i+1] = i+1; 741 jju[i] = -1; 742 } 743 } 744 745 /* import sizes of CSR */ 746 iia[0] = 0; 747 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 748 749 /* overwrite entries corresponding to the Nedelec field */ 750 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 751 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 752 for (i=0;i<ne;i++) { 753 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 754 iia[idxs[i]+1] = ii[i+1]-ii[i]; 755 } 756 757 /* iia in CSR */ 758 for (i=0;i<n;i++) iia[i+1] += iia[i]; 759 760 /* jja in CSR */ 761 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 762 for (i=0;i<n;i++) 763 if (!PetscBTLookup(btf,i)) 764 for (j=0;j<iiu[i+1]-iiu[i];j++) 765 jja[iia[i]+j] = jju[iiu[i]+j]; 766 767 /* map edge dofs connectivity */ 768 if (jj) { 769 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 770 for (i=0;i<ne;i++) { 771 PetscInt e = idxs[i]; 772 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 773 } 774 } 775 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 776 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 777 if (rest) { 778 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 779 } 780 if (free) { 781 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 782 } 783 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 784 } else { 785 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 786 } 787 788 /* Analyze interface for edge dofs */ 789 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 790 pcbddc->mat_graph->twodim = PETSC_FALSE; 791 792 /* Get coarse edges in the edge space */ 793 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 794 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 795 796 if (fl2g) { 797 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 798 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 799 for (i=0;i<nee;i++) { 800 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 801 } 802 } else { 803 eedges = alleedges; 804 primals = allprimals; 805 } 806 807 /* Mark fine edge dofs with their coarse edge id */ 808 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 809 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 810 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 811 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 812 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 813 if (print) { 814 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 815 ierr = ISView(primals,NULL);CHKERRQ(ierr); 816 } 817 818 maxsize = 0; 819 for (i=0;i<nee;i++) { 820 PetscInt size,mark = i+1; 821 822 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 823 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 824 for (j=0;j<size;j++) marks[idxs[j]] = mark; 825 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 826 maxsize = PetscMax(maxsize,size); 827 } 828 829 /* Find coarse edge endpoints */ 830 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 831 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 832 for (i=0;i<nee;i++) { 833 PetscInt mark = i+1,size; 834 835 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 836 if (!size && nedfieldlocal) continue; 837 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 838 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 839 if (print) { 840 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 841 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 842 } 843 for (j=0;j<size;j++) { 844 PetscInt k, ee = idxs[j]; 845 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 846 for (k=ii[ee];k<ii[ee+1];k++) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 848 if (PetscBTLookup(btv,jj[k])) { 849 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 850 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 851 PetscInt k2; 852 PetscBool corner = PETSC_FALSE; 853 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 854 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 855 /* it's a corner if either is connected with an edge dof belonging to a different cc or 856 if the edge dof lie on the natural part of the boundary */ 857 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 858 corner = PETSC_TRUE; 859 break; 860 } 861 } 862 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 863 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 864 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 865 } else { 866 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 867 } 868 } 869 } 870 } 871 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 872 } 873 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 874 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 875 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 876 877 /* Reset marked primal dofs */ 878 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 879 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 880 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 881 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 882 883 /* Now use the initial lG */ 884 ierr = MatDestroy(&lG);CHKERRQ(ierr); 885 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 886 lG = lGinit; 887 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 888 889 /* Compute extended cols indices */ 890 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 891 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 892 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 893 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 894 i *= maxsize; 895 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 896 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 897 eerr = PETSC_FALSE; 898 for (i=0;i<nee;i++) { 899 PetscInt size,found = 0; 900 901 cum = 0; 902 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 903 if (!size && nedfieldlocal) continue; 904 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 905 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 906 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 907 for (j=0;j<size;j++) { 908 PetscInt k,ee = idxs[j]; 909 for (k=ii[ee];k<ii[ee+1];k++) { 910 PetscInt vv = jj[k]; 911 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 912 else if (!PetscBTLookupSet(btvc,vv)) found++; 913 } 914 } 915 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 916 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 917 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 918 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 919 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 920 /* it may happen that endpoints are not defined at this point 921 if it is the case, mark this edge for a second pass */ 922 if (cum != size -1 || found != 2) { 923 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 924 if (print) { 925 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 926 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 927 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 928 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 929 } 930 eerr = PETSC_TRUE; 931 } 932 } 933 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 934 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 935 if (done) { 936 PetscInt *newprimals; 937 938 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 939 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 940 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 941 ierr = PetscArraycpy(newprimals,idxs,cum);CHKERRQ(ierr); 942 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 944 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 945 for (i=0;i<nee;i++) { 946 PetscBool has_candidates = PETSC_FALSE; 947 if (PetscBTLookup(bter,i)) { 948 PetscInt size,mark = i+1; 949 950 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 951 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 952 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 953 for (j=0;j<size;j++) { 954 PetscInt k,ee = idxs[j]; 955 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 956 for (k=ii[ee];k<ii[ee+1];k++) { 957 /* set all candidates located on the edge as corners */ 958 if (PetscBTLookup(btvcand,jj[k])) { 959 PetscInt k2,vv = jj[k]; 960 has_candidates = PETSC_TRUE; 961 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 962 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 963 /* set all edge dofs connected to candidate as primals */ 964 for (k2=iit[vv];k2<iit[vv+1];k2++) { 965 if (marks[jjt[k2]] == mark) { 966 PetscInt k3,ee2 = jjt[k2]; 967 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 968 newprimals[cum++] = ee2; 969 /* finally set the new corners */ 970 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 971 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 972 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 973 } 974 } 975 } 976 } else { 977 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 978 } 979 } 980 } 981 if (!has_candidates) { /* circular edge */ 982 PetscInt k, ee = idxs[0],*tmarks; 983 984 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 985 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 986 for (k=ii[ee];k<ii[ee+1];k++) { 987 PetscInt k2; 988 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 989 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 990 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 991 } 992 for (j=0;j<size;j++) { 993 if (tmarks[idxs[j]] > 1) { 994 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 995 newprimals[cum++] = idxs[j]; 996 } 997 } 998 ierr = PetscFree(tmarks);CHKERRQ(ierr); 999 } 1000 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1001 } 1002 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1003 } 1004 ierr = PetscFree(extcols);CHKERRQ(ierr); 1005 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1006 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1007 if (fl2g) { 1008 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1009 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1010 for (i=0;i<nee;i++) { 1011 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1012 } 1013 ierr = PetscFree(eedges);CHKERRQ(ierr); 1014 } 1015 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1016 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1017 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1018 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1019 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1020 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1021 pcbddc->mat_graph->twodim = PETSC_FALSE; 1022 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1023 if (fl2g) { 1024 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1025 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1026 for (i=0;i<nee;i++) { 1027 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1028 } 1029 } else { 1030 eedges = alleedges; 1031 primals = allprimals; 1032 } 1033 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1034 1035 /* Mark again */ 1036 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 1037 for (i=0;i<nee;i++) { 1038 PetscInt size,mark = i+1; 1039 1040 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1041 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1042 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1043 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1044 } 1045 if (print) { 1046 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1047 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1048 } 1049 1050 /* Recompute extended cols */ 1051 eerr = PETSC_FALSE; 1052 for (i=0;i<nee;i++) { 1053 PetscInt size; 1054 1055 cum = 0; 1056 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1057 if (!size && nedfieldlocal) continue; 1058 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1059 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1060 for (j=0;j<size;j++) { 1061 PetscInt k,ee = idxs[j]; 1062 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1063 } 1064 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1065 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1066 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1067 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1068 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1069 if (cum != size -1) { 1070 if (print) { 1071 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1072 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1073 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1074 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1075 } 1076 eerr = PETSC_TRUE; 1077 } 1078 } 1079 } 1080 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1081 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1082 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1083 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1084 /* an error should not occur at this point */ 1085 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1086 1087 /* Check the number of endpoints */ 1088 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1089 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1090 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1091 for (i=0;i<nee;i++) { 1092 PetscInt size, found = 0, gc[2]; 1093 1094 /* init with defaults */ 1095 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1096 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1097 if (!size && nedfieldlocal) continue; 1098 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1099 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1100 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1101 for (j=0;j<size;j++) { 1102 PetscInt k,ee = idxs[j]; 1103 for (k=ii[ee];k<ii[ee+1];k++) { 1104 PetscInt vv = jj[k]; 1105 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1106 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1107 corners[i*2+found++] = vv; 1108 } 1109 } 1110 } 1111 if (found != 2) { 1112 PetscInt e; 1113 if (fl2g) { 1114 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1115 } else { 1116 e = idxs[0]; 1117 } 1118 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1119 } 1120 1121 /* get primal dof index on this coarse edge */ 1122 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1123 if (gc[0] > gc[1]) { 1124 PetscInt swap = corners[2*i]; 1125 corners[2*i] = corners[2*i+1]; 1126 corners[2*i+1] = swap; 1127 } 1128 cedges[i] = idxs[size-1]; 1129 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1130 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1131 } 1132 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1134 1135 #if defined(PETSC_USE_DEBUG) 1136 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1137 not interfere with neighbouring coarse edges */ 1138 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1139 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 for (i=0;i<nv;i++) { 1141 PetscInt emax = 0,eemax = 0; 1142 1143 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1144 ierr = PetscArrayzero(emarks,nee+1);CHKERRQ(ierr); 1145 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1146 for (j=1;j<nee+1;j++) { 1147 if (emax < emarks[j]) { 1148 emax = emarks[j]; 1149 eemax = j; 1150 } 1151 } 1152 /* not relevant for edges */ 1153 if (!eemax) continue; 1154 1155 for (j=ii[i];j<ii[i+1];j++) { 1156 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1157 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1158 } 1159 } 1160 } 1161 ierr = PetscFree(emarks);CHKERRQ(ierr); 1162 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1163 #endif 1164 1165 /* Compute extended rows indices for edge blocks of the change of basis */ 1166 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1167 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1168 extmem *= maxsize; 1169 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1170 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1171 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1172 for (i=0;i<nv;i++) { 1173 PetscInt mark = 0,size,start; 1174 1175 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1176 for (j=ii[i];j<ii[i+1];j++) 1177 if (marks[jj[j]] && !mark) 1178 mark = marks[jj[j]]; 1179 1180 /* not relevant */ 1181 if (!mark) continue; 1182 1183 /* import extended row */ 1184 mark--; 1185 start = mark*extmem+extrowcum[mark]; 1186 size = ii[i+1]-ii[i]; 1187 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1188 ierr = PetscArraycpy(extrow+start,jj+ii[i],size);CHKERRQ(ierr); 1189 extrowcum[mark] += size; 1190 } 1191 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1192 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1193 ierr = PetscFree(marks);CHKERRQ(ierr); 1194 1195 /* Compress extrows */ 1196 cum = 0; 1197 for (i=0;i<nee;i++) { 1198 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1199 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1201 cum = PetscMax(cum,size); 1202 } 1203 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1204 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1205 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1206 1207 /* Workspace for lapack inner calls and VecSetValues */ 1208 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1209 1210 /* Create change of basis matrix (preallocation can be improved) */ 1211 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1212 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1213 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1214 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1215 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1216 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1217 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1218 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1219 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1220 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1221 1222 /* Defaults to identity */ 1223 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1224 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1225 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1226 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1227 1228 /* Create discrete gradient for the coarser level if needed */ 1229 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1230 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1231 if (pcbddc->current_level < pcbddc->max_levels) { 1232 ISLocalToGlobalMapping cel2g,cvl2g; 1233 IS wis,gwis; 1234 PetscInt cnv,cne; 1235 1236 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1237 if (fl2g) { 1238 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1241 pcbddc->nedclocal = wis; 1242 } 1243 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1244 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1245 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1246 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1247 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1248 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1249 1250 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1251 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1252 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1253 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1254 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1255 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1256 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1257 1258 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1259 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1260 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1261 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1262 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1263 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1265 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1266 } 1267 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1268 1269 #if defined(PRINT_GDET) 1270 inc = 0; 1271 lev = pcbddc->current_level; 1272 #endif 1273 1274 /* Insert values in the change of basis matrix */ 1275 for (i=0;i<nee;i++) { 1276 Mat Gins = NULL, GKins = NULL; 1277 IS cornersis = NULL; 1278 PetscScalar cvals[2]; 1279 1280 if (pcbddc->nedcG) { 1281 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1282 } 1283 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1284 if (Gins && GKins) { 1285 const PetscScalar *data; 1286 const PetscInt *rows,*cols; 1287 PetscInt nrh,nch,nrc,ncc; 1288 1289 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1290 /* H1 */ 1291 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1292 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1293 ierr = MatDenseGetArrayRead(Gins,&data);CHKERRQ(ierr); 1294 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1295 ierr = MatDenseRestoreArrayRead(Gins,&data);CHKERRQ(ierr); 1296 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1297 /* complement */ 1298 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1299 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1300 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1301 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1302 ierr = MatDenseGetArrayRead(GKins,&data);CHKERRQ(ierr); 1303 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1304 ierr = MatDenseRestoreArrayRead(GKins,&data);CHKERRQ(ierr); 1305 1306 /* coarse discrete gradient */ 1307 if (pcbddc->nedcG) { 1308 PetscInt cols[2]; 1309 1310 cols[0] = 2*i; 1311 cols[1] = 2*i+1; 1312 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1313 } 1314 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1315 } 1316 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1317 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1318 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1319 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1320 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1321 } 1322 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1323 1324 /* Start assembling */ 1325 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1326 if (pcbddc->nedcG) { 1327 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1328 } 1329 1330 /* Free */ 1331 if (fl2g) { 1332 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1333 for (i=0;i<nee;i++) { 1334 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1335 } 1336 ierr = PetscFree(eedges);CHKERRQ(ierr); 1337 } 1338 1339 /* hack mat_graph with primal dofs on the coarse edges */ 1340 { 1341 PCBDDCGraph graph = pcbddc->mat_graph; 1342 PetscInt *oqueue = graph->queue; 1343 PetscInt *ocptr = graph->cptr; 1344 PetscInt ncc,*idxs; 1345 1346 /* find first primal edge */ 1347 if (pcbddc->nedclocal) { 1348 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1349 } else { 1350 if (fl2g) { 1351 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1352 } 1353 idxs = cedges; 1354 } 1355 cum = 0; 1356 while (cum < nee && cedges[cum] < 0) cum++; 1357 1358 /* adapt connected components */ 1359 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1360 graph->cptr[0] = 0; 1361 for (i=0,ncc=0;i<graph->ncc;i++) { 1362 PetscInt lc = ocptr[i+1]-ocptr[i]; 1363 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1364 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1365 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1366 ncc++; 1367 lc--; 1368 cum++; 1369 while (cum < nee && cedges[cum] < 0) cum++; 1370 } 1371 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1372 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1373 ncc++; 1374 } 1375 graph->ncc = ncc; 1376 if (pcbddc->nedclocal) { 1377 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1378 } 1379 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1380 } 1381 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1382 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1384 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1385 1386 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1387 ierr = PetscFree(extrow);CHKERRQ(ierr); 1388 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1389 ierr = PetscFree(corners);CHKERRQ(ierr); 1390 ierr = PetscFree(cedges);CHKERRQ(ierr); 1391 ierr = PetscFree(extrows);CHKERRQ(ierr); 1392 ierr = PetscFree(extcols);CHKERRQ(ierr); 1393 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1394 1395 /* Complete assembling */ 1396 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 if (pcbddc->nedcG) { 1398 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 #if 0 1400 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1401 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1402 #endif 1403 } 1404 1405 /* set change of basis */ 1406 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1407 ierr = MatDestroy(&T);CHKERRQ(ierr); 1408 1409 PetscFunctionReturn(0); 1410 } 1411 1412 /* the near-null space of BDDC carries information on quadrature weights, 1413 and these can be collinear -> so cheat with MatNullSpaceCreate 1414 and create a suitable set of basis vectors first */ 1415 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1416 { 1417 PetscErrorCode ierr; 1418 PetscInt i; 1419 1420 PetscFunctionBegin; 1421 for (i=0;i<nvecs;i++) { 1422 PetscInt first,last; 1423 1424 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1425 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1426 if (i>=first && i < last) { 1427 PetscScalar *data; 1428 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1429 if (!has_const) { 1430 data[i-first] = 1.; 1431 } else { 1432 data[2*i-first] = 1./PetscSqrtReal(2.); 1433 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1434 } 1435 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1436 } 1437 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1438 } 1439 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1440 for (i=0;i<nvecs;i++) { /* reset vectors */ 1441 PetscInt first,last; 1442 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1443 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1444 if (i>=first && i < last) { 1445 PetscScalar *data; 1446 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1447 if (!has_const) { 1448 data[i-first] = 0.; 1449 } else { 1450 data[2*i-first] = 0.; 1451 data[2*i-first+1] = 0.; 1452 } 1453 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1454 } 1455 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1456 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1462 { 1463 Mat loc_divudotp; 1464 Vec p,v,vins,quad_vec,*quad_vecs; 1465 ISLocalToGlobalMapping map; 1466 PetscScalar *vals; 1467 const PetscScalar *array; 1468 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1469 PetscInt n_neigh,*neigh,*n_shared,**shared; 1470 PetscMPIInt rank; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1475 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1476 ierr = MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1477 if (!maxneighs) { 1478 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1479 *nnsp = NULL; 1480 PetscFunctionReturn(0); 1481 } 1482 maxsize = 0; 1483 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1484 ierr = PetscMalloc2(maxsize,&gidxs,maxsize,&vals);CHKERRQ(ierr); 1485 /* create vectors to hold quadrature weights */ 1486 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1487 if (!transpose) { 1488 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1489 } else { 1490 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1491 } 1492 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1493 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1494 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1495 for (i=0;i<maxneighs;i++) { 1496 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1497 } 1498 1499 /* compute local quad vec */ 1500 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1501 if (!transpose) { 1502 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1505 } 1506 ierr = VecSet(p,1.);CHKERRQ(ierr); 1507 if (!transpose) { 1508 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1509 } else { 1510 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1511 } 1512 if (vl2l) { 1513 Mat lA; 1514 VecScatter sc; 1515 1516 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1517 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1518 ierr = VecScatterCreate(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1519 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1522 } else { 1523 vins = v; 1524 } 1525 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1526 ierr = VecDestroy(&p);CHKERRQ(ierr); 1527 1528 /* insert in global quadrature vecs */ 1529 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1530 for (i=1;i<n_neigh;i++) { 1531 const PetscInt *idxs; 1532 PetscInt idx,nn,j; 1533 1534 idxs = shared[i]; 1535 nn = n_shared[i]; 1536 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1537 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1538 idx = -(idx+1); 1539 if (idx < 0 || idx >= maxneighs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs); 1540 ierr = ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);CHKERRQ(ierr); 1541 ierr = VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1542 } 1543 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1544 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1545 if (vl2l) { 1546 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1547 } 1548 ierr = VecDestroy(&v);CHKERRQ(ierr); 1549 ierr = PetscFree2(gidxs,vals);CHKERRQ(ierr); 1550 1551 /* assemble near null space */ 1552 for (i=0;i<maxneighs;i++) { 1553 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1554 } 1555 for (i=0;i<maxneighs;i++) { 1556 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1557 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1558 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1559 } 1560 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1565 { 1566 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1567 PetscErrorCode ierr; 1568 1569 PetscFunctionBegin; 1570 if (primalv) { 1571 if (pcbddc->user_primal_vertices_local) { 1572 IS list[2], newp; 1573 1574 list[0] = primalv; 1575 list[1] = pcbddc->user_primal_vertices_local; 1576 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1577 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1578 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1579 pcbddc->user_primal_vertices_local = newp; 1580 } else { 1581 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1582 } 1583 } 1584 PetscFunctionReturn(0); 1585 } 1586 1587 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1588 { 1589 PetscInt f, *comp = (PetscInt *)ctx; 1590 1591 PetscFunctionBegin; 1592 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1597 { 1598 PetscErrorCode ierr; 1599 Vec local,global; 1600 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1601 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1602 PetscBool monolithic = PETSC_FALSE; 1603 1604 PetscFunctionBegin; 1605 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1606 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1607 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1608 /* need to convert from global to local topology information and remove references to information in global ordering */ 1609 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1610 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1611 ierr = VecBindToCPU(global,PETSC_TRUE);CHKERRQ(ierr); 1612 ierr = VecBindToCPU(local,PETSC_TRUE);CHKERRQ(ierr); 1613 if (monolithic) { /* just get block size to properly compute vertices */ 1614 if (pcbddc->vertex_size == 1) { 1615 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1616 } 1617 goto boundary; 1618 } 1619 1620 if (pcbddc->user_provided_isfordofs) { 1621 if (pcbddc->n_ISForDofs) { 1622 PetscInt i; 1623 1624 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1625 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1626 PetscInt bs; 1627 1628 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1629 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1630 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1631 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1632 } 1633 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1634 pcbddc->n_ISForDofs = 0; 1635 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1636 } 1637 } else { 1638 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1639 DM dm; 1640 1641 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1642 if (!dm) { 1643 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1644 } 1645 if (dm) { 1646 IS *fields; 1647 PetscInt nf,i; 1648 1649 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1651 for (i=0;i<nf;i++) { 1652 PetscInt bs; 1653 1654 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1655 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1656 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1657 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1658 } 1659 ierr = PetscFree(fields);CHKERRQ(ierr); 1660 pcbddc->n_ISForDofsLocal = nf; 1661 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1662 PetscContainer c; 1663 1664 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1665 if (c) { 1666 MatISLocalFields lf; 1667 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1668 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1669 } else { /* fallback, create the default fields if bs > 1 */ 1670 PetscInt i, n = matis->A->rmap->n; 1671 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1672 if (i > 1) { 1673 pcbddc->n_ISForDofsLocal = i; 1674 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1675 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1676 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 } else { 1682 PetscInt i; 1683 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1684 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1685 } 1686 } 1687 } 1688 1689 boundary: 1690 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1692 } else if (pcbddc->DirichletBoundariesLocal) { 1693 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1694 } 1695 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1696 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1697 } else if (pcbddc->NeumannBoundariesLocal) { 1698 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1699 } 1700 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1701 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1702 } 1703 ierr = VecDestroy(&global);CHKERRQ(ierr); 1704 ierr = VecDestroy(&local);CHKERRQ(ierr); 1705 /* detect local disconnected subdomains if requested (use matis->A) */ 1706 if (pcbddc->detect_disconnected) { 1707 IS primalv = NULL; 1708 PetscInt i; 1709 PetscBool filter = pcbddc->detect_disconnected_filter; 1710 1711 for (i=0;i<pcbddc->n_local_subs;i++) { 1712 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1713 } 1714 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1715 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1716 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1717 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1718 } 1719 /* early stage corner detection */ 1720 { 1721 DM dm; 1722 1723 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1724 if (!dm) { 1725 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1726 } 1727 if (dm) { 1728 PetscBool isda; 1729 1730 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1731 if (isda) { 1732 ISLocalToGlobalMapping l2l; 1733 IS corners; 1734 Mat lA; 1735 PetscBool gl,lo; 1736 1737 { 1738 Vec cvec; 1739 const PetscScalar *coords; 1740 PetscInt dof,n,cdim; 1741 PetscBool memc = PETSC_TRUE; 1742 1743 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1744 ierr = DMGetCoordinates(dm,&cvec);CHKERRQ(ierr); 1745 ierr = VecGetLocalSize(cvec,&n);CHKERRQ(ierr); 1746 ierr = VecGetBlockSize(cvec,&cdim);CHKERRQ(ierr); 1747 n /= cdim; 1748 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 1749 ierr = PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);CHKERRQ(ierr); 1750 ierr = VecGetArrayRead(cvec,&coords);CHKERRQ(ierr); 1751 #if defined(PETSC_USE_COMPLEX) 1752 memc = PETSC_FALSE; 1753 #endif 1754 if (dof != 1) memc = PETSC_FALSE; 1755 if (memc) { 1756 ierr = PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);CHKERRQ(ierr); 1757 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1758 PetscReal *bcoords = pcbddc->mat_graph->coords; 1759 PetscInt i, b, d; 1760 1761 for (i=0;i<n;i++) { 1762 for (b=0;b<dof;b++) { 1763 for (d=0;d<cdim;d++) { 1764 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1765 } 1766 } 1767 } 1768 } 1769 ierr = VecRestoreArrayRead(cvec,&coords);CHKERRQ(ierr); 1770 pcbddc->mat_graph->cdim = cdim; 1771 pcbddc->mat_graph->cnloc = dof*n; 1772 pcbddc->mat_graph->cloc = PETSC_FALSE; 1773 } 1774 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1775 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1776 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1777 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1778 lo = (PetscBool)(l2l && corners); 1779 ierr = MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1780 if (gl) { /* From PETSc's DMDA */ 1781 const PetscInt *idx; 1782 PetscInt dof,bs,*idxout,n; 1783 1784 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1785 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1786 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1787 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1788 if (bs == dof) { 1789 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1790 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1791 } else { /* the original DMDA local-to-local map have been modified */ 1792 PetscInt i,d; 1793 1794 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1795 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1796 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1797 1798 bs = 1; 1799 n *= dof; 1800 } 1801 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1802 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1803 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1804 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1805 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1806 pcbddc->corner_selected = PETSC_TRUE; 1807 pcbddc->corner_selection = PETSC_TRUE; 1808 } 1809 if (corners) { 1810 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1811 } 1812 } 1813 } 1814 } 1815 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1816 DM dm; 1817 1818 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1819 if (!dm) { 1820 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1821 } 1822 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1823 Vec vcoords; 1824 PetscSection section; 1825 PetscReal *coords; 1826 PetscInt d,cdim,nl,nf,**ctxs; 1827 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1828 1829 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1830 ierr = DMGetLocalSection(dm,§ion);CHKERRQ(ierr); 1831 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1832 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1833 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1834 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1835 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1836 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1837 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1838 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1839 for (d=0;d<cdim;d++) { 1840 PetscInt i; 1841 const PetscScalar *v; 1842 1843 for (i=0;i<nf;i++) ctxs[i][0] = d; 1844 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1845 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1846 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1847 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1848 } 1849 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1850 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1851 ierr = PetscFree(coords);CHKERRQ(ierr); 1852 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1853 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1854 } 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1860 { 1861 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1862 PetscErrorCode ierr; 1863 IS nis; 1864 const PetscInt *idxs; 1865 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1866 PetscBool *ld; 1867 1868 PetscFunctionBegin; 1869 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1870 if (mop == MPI_LAND) { 1871 /* init rootdata with true */ 1872 ld = (PetscBool*) matis->sf_rootdata; 1873 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1874 } else { 1875 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 1876 } 1877 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 1878 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1879 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1880 ld = (PetscBool*) matis->sf_leafdata; 1881 for (i=0;i<nd;i++) 1882 if (-1 < idxs[i] && idxs[i] < n) 1883 ld[idxs[i]] = PETSC_TRUE; 1884 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1885 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1886 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1887 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1888 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1889 if (mop == MPI_LAND) { 1890 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1891 } else { 1892 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1893 } 1894 for (i=0,nnd=0;i<n;i++) 1895 if (ld[i]) 1896 nidxs[nnd++] = i; 1897 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1898 ierr = ISDestroy(is);CHKERRQ(ierr); 1899 *is = nis; 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1904 { 1905 PC_IS *pcis = (PC_IS*)(pc->data); 1906 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1907 PetscErrorCode ierr; 1908 1909 PetscFunctionBegin; 1910 if (!pcbddc->benign_have_null) { 1911 PetscFunctionReturn(0); 1912 } 1913 if (pcbddc->ChangeOfBasisMatrix) { 1914 Vec swap; 1915 1916 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1917 swap = pcbddc->work_change; 1918 pcbddc->work_change = r; 1919 r = swap; 1920 } 1921 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1922 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1923 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1924 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 1925 ierr = VecSet(z,0.);CHKERRQ(ierr); 1926 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1927 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1928 if (pcbddc->ChangeOfBasisMatrix) { 1929 pcbddc->work_change = r; 1930 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1931 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1932 } 1933 PetscFunctionReturn(0); 1934 } 1935 1936 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1937 { 1938 PCBDDCBenignMatMult_ctx ctx; 1939 PetscErrorCode ierr; 1940 PetscBool apply_right,apply_left,reset_x; 1941 1942 PetscFunctionBegin; 1943 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1944 if (transpose) { 1945 apply_right = ctx->apply_left; 1946 apply_left = ctx->apply_right; 1947 } else { 1948 apply_right = ctx->apply_right; 1949 apply_left = ctx->apply_left; 1950 } 1951 reset_x = PETSC_FALSE; 1952 if (apply_right) { 1953 const PetscScalar *ax; 1954 PetscInt nl,i; 1955 1956 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1957 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1958 ierr = PetscArraycpy(ctx->work,ax,nl);CHKERRQ(ierr); 1959 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1960 for (i=0;i<ctx->benign_n;i++) { 1961 PetscScalar sum,val; 1962 const PetscInt *idxs; 1963 PetscInt nz,j; 1964 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1965 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1966 sum = 0.; 1967 if (ctx->apply_p0) { 1968 val = ctx->work[idxs[nz-1]]; 1969 for (j=0;j<nz-1;j++) { 1970 sum += ctx->work[idxs[j]]; 1971 ctx->work[idxs[j]] += val; 1972 } 1973 } else { 1974 for (j=0;j<nz-1;j++) { 1975 sum += ctx->work[idxs[j]]; 1976 } 1977 } 1978 ctx->work[idxs[nz-1]] -= sum; 1979 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1980 } 1981 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1982 reset_x = PETSC_TRUE; 1983 } 1984 if (transpose) { 1985 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1986 } else { 1987 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1988 } 1989 if (reset_x) { 1990 ierr = VecResetArray(x);CHKERRQ(ierr); 1991 } 1992 if (apply_left) { 1993 PetscScalar *ay; 1994 PetscInt i; 1995 1996 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1997 for (i=0;i<ctx->benign_n;i++) { 1998 PetscScalar sum,val; 1999 const PetscInt *idxs; 2000 PetscInt nz,j; 2001 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2002 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2003 val = -ay[idxs[nz-1]]; 2004 if (ctx->apply_p0) { 2005 sum = 0.; 2006 for (j=0;j<nz-1;j++) { 2007 sum += ay[idxs[j]]; 2008 ay[idxs[j]] += val; 2009 } 2010 ay[idxs[nz-1]] += sum; 2011 } else { 2012 for (j=0;j<nz-1;j++) { 2013 ay[idxs[j]] += val; 2014 } 2015 ay[idxs[nz-1]] = 0.; 2016 } 2017 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2018 } 2019 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 2020 } 2021 PetscFunctionReturn(0); 2022 } 2023 2024 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2025 { 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2030 PetscFunctionReturn(0); 2031 } 2032 2033 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2034 { 2035 PetscErrorCode ierr; 2036 2037 PetscFunctionBegin; 2038 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2043 { 2044 PC_IS *pcis = (PC_IS*)pc->data; 2045 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2046 PCBDDCBenignMatMult_ctx ctx; 2047 PetscErrorCode ierr; 2048 2049 PetscFunctionBegin; 2050 if (!restore) { 2051 Mat A_IB,A_BI; 2052 PetscScalar *work; 2053 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2054 2055 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2056 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2057 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2058 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2059 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2060 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2061 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2062 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2063 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2064 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2065 ctx->apply_left = PETSC_TRUE; 2066 ctx->apply_right = PETSC_FALSE; 2067 ctx->apply_p0 = PETSC_FALSE; 2068 ctx->benign_n = pcbddc->benign_n; 2069 if (reuse) { 2070 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2071 ctx->free = PETSC_FALSE; 2072 } else { /* TODO: could be optimized for successive solves */ 2073 ISLocalToGlobalMapping N_to_D; 2074 PetscInt i; 2075 2076 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2077 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2078 for (i=0;i<pcbddc->benign_n;i++) { 2079 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2080 } 2081 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2082 ctx->free = PETSC_TRUE; 2083 } 2084 ctx->A = pcis->A_IB; 2085 ctx->work = work; 2086 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2087 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 pcis->A_IB = A_IB; 2090 2091 /* A_BI as A_IB^T */ 2092 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2093 pcbddc->benign_original_mat = pcis->A_BI; 2094 pcis->A_BI = A_BI; 2095 } else { 2096 if (!pcbddc->benign_original_mat) { 2097 PetscFunctionReturn(0); 2098 } 2099 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2100 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2101 pcis->A_IB = ctx->A; 2102 ctx->A = NULL; 2103 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2104 pcis->A_BI = pcbddc->benign_original_mat; 2105 pcbddc->benign_original_mat = NULL; 2106 if (ctx->free) { 2107 PetscInt i; 2108 for (i=0;i<ctx->benign_n;i++) { 2109 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2110 } 2111 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2112 } 2113 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2114 ierr = PetscFree(ctx);CHKERRQ(ierr); 2115 } 2116 PetscFunctionReturn(0); 2117 } 2118 2119 /* used just in bddc debug mode */ 2120 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2121 { 2122 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2123 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2124 Mat An; 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2129 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2130 if (is1) { 2131 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2132 ierr = MatDestroy(&An);CHKERRQ(ierr); 2133 } else { 2134 *B = An; 2135 } 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /* TODO: add reuse flag */ 2140 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2141 { 2142 Mat Bt; 2143 PetscScalar *a,*bdata; 2144 const PetscInt *ii,*ij; 2145 PetscInt m,n,i,nnz,*bii,*bij; 2146 PetscBool flg_row; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2151 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2152 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2153 nnz = n; 2154 for (i=0;i<ii[n];i++) { 2155 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2156 } 2157 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2158 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2159 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2160 nnz = 0; 2161 bii[0] = 0; 2162 for (i=0;i<n;i++) { 2163 PetscInt j; 2164 for (j=ii[i];j<ii[i+1];j++) { 2165 PetscScalar entry = a[j]; 2166 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2167 bij[nnz] = ij[j]; 2168 bdata[nnz] = entry; 2169 nnz++; 2170 } 2171 } 2172 bii[i+1] = nnz; 2173 } 2174 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2175 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2176 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2177 { 2178 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2179 b->free_a = PETSC_TRUE; 2180 b->free_ij = PETSC_TRUE; 2181 } 2182 if (*B == A) { 2183 ierr = MatDestroy(&A);CHKERRQ(ierr); 2184 } 2185 *B = Bt; 2186 PetscFunctionReturn(0); 2187 } 2188 2189 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2190 { 2191 Mat B = NULL; 2192 DM dm; 2193 IS is_dummy,*cc_n; 2194 ISLocalToGlobalMapping l2gmap_dummy; 2195 PCBDDCGraph graph; 2196 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2197 PetscInt i,n; 2198 PetscInt *xadj,*adjncy; 2199 PetscBool isplex = PETSC_FALSE; 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 if (ncc) *ncc = 0; 2204 if (cc) *cc = NULL; 2205 if (primalv) *primalv = NULL; 2206 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2207 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2208 if (!dm) { 2209 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2210 } 2211 if (dm) { 2212 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2213 } 2214 if (filter) isplex = PETSC_FALSE; 2215 2216 if (isplex) { /* this code has been modified from plexpartition.c */ 2217 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2218 PetscInt *adj = NULL; 2219 IS cellNumbering; 2220 const PetscInt *cellNum; 2221 PetscBool useCone, useClosure; 2222 PetscSection section; 2223 PetscSegBuffer adjBuffer; 2224 PetscSF sfPoint; 2225 PetscErrorCode ierr; 2226 2227 PetscFunctionBegin; 2228 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2229 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2230 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2231 /* Build adjacency graph via a section/segbuffer */ 2232 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2233 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2234 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2235 /* Always use FVM adjacency to create partitioner graph */ 2236 ierr = DMGetBasicAdjacency(dm, &useCone, &useClosure);CHKERRQ(ierr); 2237 ierr = DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);CHKERRQ(ierr); 2238 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2239 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2240 for (n = 0, p = pStart; p < pEnd; p++) { 2241 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2242 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2243 adjSize = PETSC_DETERMINE; 2244 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2245 for (a = 0; a < adjSize; ++a) { 2246 const PetscInt point = adj[a]; 2247 if (pStart <= point && point < pEnd) { 2248 PetscInt *PETSC_RESTRICT pBuf; 2249 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2250 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2251 *pBuf = point; 2252 } 2253 } 2254 n++; 2255 } 2256 ierr = DMSetBasicAdjacency(dm, useCone, useClosure);CHKERRQ(ierr); 2257 /* Derive CSR graph from section/segbuffer */ 2258 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2259 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2260 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2261 for (idx = 0, p = pStart; p < pEnd; p++) { 2262 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2263 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2264 } 2265 xadj[n] = size; 2266 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2267 /* Clean up */ 2268 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2269 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2270 ierr = PetscFree(adj);CHKERRQ(ierr); 2271 graph->xadj = xadj; 2272 graph->adjncy = adjncy; 2273 } else { 2274 Mat A; 2275 PetscBool isseqaij, flg_row; 2276 2277 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2278 if (!A->rmap->N || !A->cmap->N) { 2279 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2283 if (!isseqaij && filter) { 2284 PetscBool isseqdense; 2285 2286 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2287 if (!isseqdense) { 2288 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2289 } else { /* TODO: rectangular case and LDA */ 2290 PetscScalar *array; 2291 PetscReal chop=1.e-6; 2292 2293 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2294 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2295 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2296 for (i=0;i<n;i++) { 2297 PetscInt j; 2298 for (j=i+1;j<n;j++) { 2299 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2300 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2301 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2302 } 2303 } 2304 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2305 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2306 } 2307 } else { 2308 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2309 B = A; 2310 } 2311 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2312 2313 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2314 if (filter) { 2315 PetscScalar *data; 2316 PetscInt j,cum; 2317 2318 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2319 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2320 cum = 0; 2321 for (i=0;i<n;i++) { 2322 PetscInt t; 2323 2324 for (j=xadj[i];j<xadj[i+1];j++) { 2325 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2326 continue; 2327 } 2328 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2329 } 2330 t = xadj_filtered[i]; 2331 xadj_filtered[i] = cum; 2332 cum += t; 2333 } 2334 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2335 graph->xadj = xadj_filtered; 2336 graph->adjncy = adjncy_filtered; 2337 } else { 2338 graph->xadj = xadj; 2339 graph->adjncy = adjncy; 2340 } 2341 } 2342 /* compute local connected components using PCBDDCGraph */ 2343 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2344 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2345 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2346 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2347 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2348 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2349 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2350 2351 /* partial clean up */ 2352 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2353 if (B) { 2354 PetscBool flg_row; 2355 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2356 ierr = MatDestroy(&B);CHKERRQ(ierr); 2357 } 2358 if (isplex) { 2359 ierr = PetscFree(xadj);CHKERRQ(ierr); 2360 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2361 } 2362 2363 /* get back data */ 2364 if (isplex) { 2365 if (ncc) *ncc = graph->ncc; 2366 if (cc || primalv) { 2367 Mat A; 2368 PetscBT btv,btvt; 2369 PetscSection subSection; 2370 PetscInt *ids,cum,cump,*cids,*pids; 2371 2372 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2373 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2374 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2375 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2376 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2377 2378 cids[0] = 0; 2379 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2380 PetscInt j; 2381 2382 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2383 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2384 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2385 2386 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2387 for (k = 0; k < 2*size; k += 2) { 2388 PetscInt s, pp, p = closure[k], off, dof, cdof; 2389 2390 ierr = PetscSectionGetConstraintDof(subSection,p,&cdof);CHKERRQ(ierr); 2391 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2392 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2393 for (s = 0; s < dof-cdof; s++) { 2394 if (PetscBTLookupSet(btvt,off+s)) continue; 2395 if (!PetscBTLookup(btv,off+s)) { 2396 ids[cum++] = off+s; 2397 } else { /* cross-vertex */ 2398 pids[cump++] = off+s; 2399 } 2400 } 2401 ierr = DMPlexGetTreeParent(dm,p,&pp,NULL);CHKERRQ(ierr); 2402 if (pp != p) { 2403 ierr = PetscSectionGetConstraintDof(subSection,pp,&cdof);CHKERRQ(ierr); 2404 ierr = PetscSectionGetOffset(subSection,pp,&off);CHKERRQ(ierr); 2405 ierr = PetscSectionGetDof(subSection,pp,&dof);CHKERRQ(ierr); 2406 for (s = 0; s < dof-cdof; s++) { 2407 if (PetscBTLookupSet(btvt,off+s)) continue; 2408 if (!PetscBTLookup(btv,off+s)) { 2409 ids[cum++] = off+s; 2410 } else { /* cross-vertex */ 2411 pids[cump++] = off+s; 2412 } 2413 } 2414 } 2415 } 2416 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2417 } 2418 cids[i+1] = cum; 2419 /* mark dofs as already assigned */ 2420 for (j = cids[i]; j < cids[i+1]; j++) { 2421 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2422 } 2423 } 2424 if (cc) { 2425 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2426 for (i = 0; i < graph->ncc; i++) { 2427 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2428 } 2429 *cc = cc_n; 2430 } 2431 if (primalv) { 2432 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2433 } 2434 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2435 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2436 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2437 } 2438 } else { 2439 if (ncc) *ncc = graph->ncc; 2440 if (cc) { 2441 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2442 for (i=0;i<graph->ncc;i++) { 2443 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2444 } 2445 *cc = cc_n; 2446 } 2447 } 2448 /* clean up graph */ 2449 graph->xadj = 0; 2450 graph->adjncy = 0; 2451 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2456 { 2457 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2458 PC_IS* pcis = (PC_IS*)(pc->data); 2459 IS dirIS = NULL; 2460 PetscInt i; 2461 PetscErrorCode ierr; 2462 2463 PetscFunctionBegin; 2464 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2465 if (zerodiag) { 2466 Mat A; 2467 Vec vec3_N; 2468 PetscScalar *vals; 2469 const PetscInt *idxs; 2470 PetscInt nz,*count; 2471 2472 /* p0 */ 2473 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2474 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2475 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2476 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2477 for (i=0;i<nz;i++) vals[i] = 1.; 2478 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2479 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2480 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2481 /* v_I */ 2482 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2483 for (i=0;i<nz;i++) vals[i] = 0.; 2484 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2485 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2486 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2487 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2488 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2489 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2490 if (dirIS) { 2491 PetscInt n; 2492 2493 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2494 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2495 for (i=0;i<n;i++) vals[i] = 0.; 2496 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2497 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2498 } 2499 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2500 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2501 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2502 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2503 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2504 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2505 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2506 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2507 ierr = PetscFree(vals);CHKERRQ(ierr); 2508 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2509 2510 /* there should not be any pressure dofs lying on the interface */ 2511 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2512 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2513 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2514 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2515 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2516 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2517 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2518 ierr = PetscFree(count);CHKERRQ(ierr); 2519 } 2520 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2521 2522 /* check PCBDDCBenignGetOrSetP0 */ 2523 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2524 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2525 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2526 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2527 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2528 for (i=0;i<pcbddc->benign_n;i++) { 2529 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2530 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2531 } 2532 PetscFunctionReturn(0); 2533 } 2534 2535 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2536 { 2537 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2538 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2539 PetscInt nz,n,benign_n,bsp = 1; 2540 PetscInt *interior_dofs,n_interior_dofs,nneu; 2541 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2542 PetscErrorCode ierr; 2543 2544 PetscFunctionBegin; 2545 if (reuse) goto project_b0; 2546 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2547 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2548 for (n=0;n<pcbddc->benign_n;n++) { 2549 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2550 } 2551 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2552 has_null_pressures = PETSC_TRUE; 2553 have_null = PETSC_TRUE; 2554 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2555 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2556 Checks if all the pressure dofs in each subdomain have a zero diagonal 2557 If not, a change of basis on pressures is not needed 2558 since the local Schur complements are already SPD 2559 */ 2560 if (pcbddc->n_ISForDofsLocal) { 2561 IS iP = NULL; 2562 PetscInt p,*pp; 2563 PetscBool flg; 2564 2565 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);CHKERRQ(ierr); 2566 n = pcbddc->n_ISForDofsLocal; 2567 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2568 ierr = PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);CHKERRQ(ierr); 2569 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2570 if (!flg) { 2571 n = 1; 2572 pp[0] = pcbddc->n_ISForDofsLocal-1; 2573 } 2574 2575 bsp = 0; 2576 for (p=0;p<n;p++) { 2577 PetscInt bs; 2578 2579 if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2580 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2581 bsp += bs; 2582 } 2583 ierr = PetscMalloc1(bsp,&bzerodiag);CHKERRQ(ierr); 2584 bsp = 0; 2585 for (p=0;p<n;p++) { 2586 const PetscInt *idxs; 2587 PetscInt b,bs,npl,*bidxs; 2588 2589 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2590 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);CHKERRQ(ierr); 2591 ierr = ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2592 ierr = PetscMalloc1(npl/bs,&bidxs);CHKERRQ(ierr); 2593 for (b=0;b<bs;b++) { 2594 PetscInt i; 2595 2596 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2597 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);CHKERRQ(ierr); 2598 bsp++; 2599 } 2600 ierr = PetscFree(bidxs);CHKERRQ(ierr); 2601 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2602 } 2603 ierr = ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);CHKERRQ(ierr); 2604 2605 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2606 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2607 if (iP) { 2608 IS newpressures; 2609 2610 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2611 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2612 pressures = newpressures; 2613 } 2614 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2615 if (!sorted) { 2616 ierr = ISSort(pressures);CHKERRQ(ierr); 2617 } 2618 ierr = PetscFree(pp);CHKERRQ(ierr); 2619 } 2620 2621 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2622 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2623 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2624 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2625 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2626 if (!sorted) { 2627 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2628 } 2629 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2630 zerodiag_save = zerodiag; 2631 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2632 if (!nz) { 2633 if (n) have_null = PETSC_FALSE; 2634 has_null_pressures = PETSC_FALSE; 2635 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2636 } 2637 recompute_zerodiag = PETSC_FALSE; 2638 2639 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2640 zerodiag_subs = NULL; 2641 benign_n = 0; 2642 n_interior_dofs = 0; 2643 interior_dofs = NULL; 2644 nneu = 0; 2645 if (pcbddc->NeumannBoundariesLocal) { 2646 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2647 } 2648 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2649 if (checkb) { /* need to compute interior nodes */ 2650 PetscInt n,i,j; 2651 PetscInt n_neigh,*neigh,*n_shared,**shared; 2652 PetscInt *iwork; 2653 2654 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2655 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2656 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2657 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2658 for (i=1;i<n_neigh;i++) 2659 for (j=0;j<n_shared[i];j++) 2660 iwork[shared[i][j]] += 1; 2661 for (i=0;i<n;i++) 2662 if (!iwork[i]) 2663 interior_dofs[n_interior_dofs++] = i; 2664 ierr = PetscFree(iwork);CHKERRQ(ierr); 2665 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2666 } 2667 if (has_null_pressures) { 2668 IS *subs; 2669 PetscInt nsubs,i,j,nl; 2670 const PetscInt *idxs; 2671 PetscScalar *array; 2672 Vec *work; 2673 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2674 2675 subs = pcbddc->local_subs; 2676 nsubs = pcbddc->n_local_subs; 2677 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2678 if (checkb) { 2679 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2680 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2681 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2682 /* work[0] = 1_p */ 2683 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2684 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2685 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2686 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2687 /* work[0] = 1_v */ 2688 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2689 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2690 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2691 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2692 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2693 } 2694 2695 if (nsubs > 1 || bsp > 1) { 2696 IS *is; 2697 PetscInt b,totb; 2698 2699 totb = bsp; 2700 is = bsp > 1 ? bzerodiag : &zerodiag; 2701 nsubs = PetscMax(nsubs,1); 2702 ierr = PetscCalloc1(nsubs*totb,&zerodiag_subs);CHKERRQ(ierr); 2703 for (b=0;b<totb;b++) { 2704 for (i=0;i<nsubs;i++) { 2705 ISLocalToGlobalMapping l2g; 2706 IS t_zerodiag_subs; 2707 PetscInt nl; 2708 2709 if (subs) { 2710 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2711 } else { 2712 IS tis; 2713 2714 ierr = MatGetLocalSize(pcbddc->local_mat,&nl,NULL);CHKERRQ(ierr); 2715 ierr = ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);CHKERRQ(ierr); 2716 ierr = ISLocalToGlobalMappingCreateIS(tis,&l2g);CHKERRQ(ierr); 2717 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2718 } 2719 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);CHKERRQ(ierr); 2720 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2721 if (nl) { 2722 PetscBool valid = PETSC_TRUE; 2723 2724 if (checkb) { 2725 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2726 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2727 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2728 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2729 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2730 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2731 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2732 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2733 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2734 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2735 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2736 for (j=0;j<n_interior_dofs;j++) { 2737 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2738 valid = PETSC_FALSE; 2739 break; 2740 } 2741 } 2742 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2743 } 2744 if (valid && nneu) { 2745 const PetscInt *idxs; 2746 PetscInt nzb; 2747 2748 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2749 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2750 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2751 if (nzb) valid = PETSC_FALSE; 2752 } 2753 if (valid && pressures) { 2754 IS t_pressure_subs,tmp; 2755 PetscInt i1,i2; 2756 2757 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2758 ierr = ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);CHKERRQ(ierr); 2759 ierr = ISGetLocalSize(tmp,&i1);CHKERRQ(ierr); 2760 ierr = ISGetLocalSize(t_zerodiag_subs,&i2);CHKERRQ(ierr); 2761 if (i2 != i1) valid = PETSC_FALSE; 2762 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2763 ierr = ISDestroy(&tmp);CHKERRQ(ierr); 2764 } 2765 if (valid) { 2766 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);CHKERRQ(ierr); 2767 benign_n++; 2768 } else recompute_zerodiag = PETSC_TRUE; 2769 } 2770 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2771 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2772 } 2773 } 2774 } else { /* there's just one subdomain (or zero if they have not been detected */ 2775 PetscBool valid = PETSC_TRUE; 2776 2777 if (nneu) valid = PETSC_FALSE; 2778 if (valid && pressures) { 2779 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2780 } 2781 if (valid && checkb) { 2782 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2783 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2784 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2785 for (j=0;j<n_interior_dofs;j++) { 2786 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2787 valid = PETSC_FALSE; 2788 break; 2789 } 2790 } 2791 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2792 } 2793 if (valid) { 2794 benign_n = 1; 2795 ierr = PetscMalloc1(benign_n,&zerodiag_subs);CHKERRQ(ierr); 2796 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2797 zerodiag_subs[0] = zerodiag; 2798 } 2799 } 2800 if (checkb) { 2801 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2802 } 2803 } 2804 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2805 2806 if (!benign_n) { 2807 PetscInt n; 2808 2809 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2810 recompute_zerodiag = PETSC_FALSE; 2811 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2812 if (n) have_null = PETSC_FALSE; 2813 } 2814 2815 /* final check for null pressures */ 2816 if (zerodiag && pressures) { 2817 ierr = ISEqual(pressures,zerodiag,&have_null);CHKERRQ(ierr); 2818 } 2819 2820 if (recompute_zerodiag) { 2821 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2822 if (benign_n == 1) { 2823 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2824 zerodiag = zerodiag_subs[0]; 2825 } else { 2826 PetscInt i,nzn,*new_idxs; 2827 2828 nzn = 0; 2829 for (i=0;i<benign_n;i++) { 2830 PetscInt ns; 2831 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2832 nzn += ns; 2833 } 2834 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2835 nzn = 0; 2836 for (i=0;i<benign_n;i++) { 2837 PetscInt ns,*idxs; 2838 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2839 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2840 ierr = PetscArraycpy(new_idxs+nzn,idxs,ns);CHKERRQ(ierr); 2841 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2842 nzn += ns; 2843 } 2844 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2845 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2846 } 2847 have_null = PETSC_FALSE; 2848 } 2849 2850 /* determines if the coarse solver will be singular or not */ 2851 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2852 2853 /* Prepare matrix to compute no-net-flux */ 2854 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2855 Mat A,loc_divudotp; 2856 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2857 IS row,col,isused = NULL; 2858 PetscInt M,N,n,st,n_isused; 2859 2860 if (pressures) { 2861 isused = pressures; 2862 } else { 2863 isused = zerodiag_save; 2864 } 2865 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2866 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2867 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2868 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2869 n_isused = 0; 2870 if (isused) { 2871 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2872 } 2873 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2874 st = st-n_isused; 2875 if (n) { 2876 const PetscInt *gidxs; 2877 2878 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2879 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2880 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2881 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2882 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2883 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2884 } else { 2885 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2886 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2887 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2888 } 2889 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2890 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2891 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2892 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2893 ierr = ISDestroy(&row);CHKERRQ(ierr); 2894 ierr = ISDestroy(&col);CHKERRQ(ierr); 2895 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2896 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2897 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2898 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2899 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2900 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2901 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2902 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2903 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2904 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 } 2906 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2907 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2908 if (bzerodiag) { 2909 PetscInt i; 2910 2911 for (i=0;i<bsp;i++) { 2912 ierr = ISDestroy(&bzerodiag[i]);CHKERRQ(ierr); 2913 } 2914 ierr = PetscFree(bzerodiag);CHKERRQ(ierr); 2915 } 2916 pcbddc->benign_n = benign_n; 2917 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2918 2919 /* determines if the problem has subdomains with 0 pressure block */ 2920 have_null = (PetscBool)(!!pcbddc->benign_n); 2921 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2922 2923 project_b0: 2924 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2925 /* change of basis and p0 dofs */ 2926 if (pcbddc->benign_n) { 2927 PetscInt i,s,*nnz; 2928 2929 /* local change of basis for pressures */ 2930 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2931 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2932 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2933 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2934 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2935 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2936 for (i=0;i<pcbddc->benign_n;i++) { 2937 const PetscInt *idxs; 2938 PetscInt nzs,j; 2939 2940 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2941 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2942 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2943 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2944 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2945 } 2946 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2947 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2948 ierr = PetscFree(nnz);CHKERRQ(ierr); 2949 /* set identity by default */ 2950 for (i=0;i<n;i++) { 2951 ierr = MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);CHKERRQ(ierr); 2952 } 2953 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2954 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2955 /* set change on pressures */ 2956 for (s=0;s<pcbddc->benign_n;s++) { 2957 PetscScalar *array; 2958 const PetscInt *idxs; 2959 PetscInt nzs; 2960 2961 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2962 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2963 for (i=0;i<nzs-1;i++) { 2964 PetscScalar vals[2]; 2965 PetscInt cols[2]; 2966 2967 cols[0] = idxs[i]; 2968 cols[1] = idxs[nzs-1]; 2969 vals[0] = 1.; 2970 vals[1] = 1.; 2971 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2972 } 2973 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2974 for (i=0;i<nzs-1;i++) array[i] = -1.; 2975 array[nzs-1] = 1.; 2976 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2977 /* store local idxs for p0 */ 2978 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2979 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2980 ierr = PetscFree(array);CHKERRQ(ierr); 2981 } 2982 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2984 2985 /* project if needed */ 2986 if (pcbddc->benign_change_explicit) { 2987 Mat M; 2988 2989 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2990 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2991 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2992 ierr = MatDestroy(&M);CHKERRQ(ierr); 2993 } 2994 /* store global idxs for p0 */ 2995 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2996 } 2997 *zerodiaglocal = zerodiag; 2998 PetscFunctionReturn(0); 2999 } 3000 3001 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3002 { 3003 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3004 PetscScalar *array; 3005 PetscErrorCode ierr; 3006 3007 PetscFunctionBegin; 3008 if (!pcbddc->benign_sf) { 3009 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 3010 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 3011 } 3012 if (get) { 3013 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3014 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 3015 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 3016 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3017 } else { 3018 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 3019 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 3020 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 3021 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 3022 } 3023 PetscFunctionReturn(0); 3024 } 3025 3026 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3027 { 3028 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3029 PetscErrorCode ierr; 3030 3031 PetscFunctionBegin; 3032 /* TODO: add error checking 3033 - avoid nested pop (or push) calls. 3034 - cannot push before pop. 3035 - cannot call this if pcbddc->local_mat is NULL 3036 */ 3037 if (!pcbddc->benign_n) { 3038 PetscFunctionReturn(0); 3039 } 3040 if (pop) { 3041 if (pcbddc->benign_change_explicit) { 3042 IS is_p0; 3043 MatReuse reuse; 3044 3045 /* extract B_0 */ 3046 reuse = MAT_INITIAL_MATRIX; 3047 if (pcbddc->benign_B0) { 3048 reuse = MAT_REUSE_MATRIX; 3049 } 3050 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 3051 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 3052 /* remove rows and cols from local problem */ 3053 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 3054 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3055 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 3056 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3057 } else { 3058 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3059 PetscScalar *vals; 3060 PetscInt i,n,*idxs_ins; 3061 3062 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 3063 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 3064 if (!pcbddc->benign_B0) { 3065 PetscInt *nnz; 3066 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 3067 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 3068 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3069 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 3070 for (i=0;i<pcbddc->benign_n;i++) { 3071 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 3072 nnz[i] = n - nnz[i]; 3073 } 3074 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 3075 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3076 ierr = PetscFree(nnz);CHKERRQ(ierr); 3077 } 3078 3079 for (i=0;i<pcbddc->benign_n;i++) { 3080 PetscScalar *array; 3081 PetscInt *idxs,j,nz,cum; 3082 3083 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 3084 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3085 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3086 for (j=0;j<nz;j++) vals[j] = 1.; 3087 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3088 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 3089 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 3090 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 3091 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3092 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3093 cum = 0; 3094 for (j=0;j<n;j++) { 3095 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3096 vals[cum] = array[j]; 3097 idxs_ins[cum] = j; 3098 cum++; 3099 } 3100 } 3101 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3102 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3103 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3104 } 3105 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3106 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3107 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3108 } 3109 } else { /* push */ 3110 if (pcbddc->benign_change_explicit) { 3111 PetscInt i; 3112 3113 for (i=0;i<pcbddc->benign_n;i++) { 3114 PetscScalar *B0_vals; 3115 PetscInt *B0_cols,B0_ncol; 3116 3117 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3118 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3119 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3120 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3121 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3122 } 3123 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3124 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3125 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3126 } 3127 PetscFunctionReturn(0); 3128 } 3129 3130 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3131 { 3132 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3133 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3134 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3135 PetscBLASInt *B_iwork,*B_ifail; 3136 PetscScalar *work,lwork; 3137 PetscScalar *St,*S,*eigv; 3138 PetscScalar *Sarray,*Starray; 3139 PetscReal *eigs,thresh,lthresh,uthresh; 3140 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3141 PetscBool allocated_S_St; 3142 #if defined(PETSC_USE_COMPLEX) 3143 PetscReal *rwork; 3144 #endif 3145 PetscErrorCode ierr; 3146 3147 PetscFunctionBegin; 3148 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3149 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3150 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3151 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3152 3153 if (pcbddc->dbg_flag) { 3154 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3156 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3157 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3158 } 3159 3160 if (pcbddc->dbg_flag) { 3161 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3162 } 3163 3164 /* max size of subsets */ 3165 mss = 0; 3166 for (i=0;i<sub_schurs->n_subs;i++) { 3167 PetscInt subset_size; 3168 3169 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3170 mss = PetscMax(mss,subset_size); 3171 } 3172 3173 /* min/max and threshold */ 3174 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3175 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3176 nmax = PetscMax(nmin,nmax); 3177 allocated_S_St = PETSC_FALSE; 3178 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3179 allocated_S_St = PETSC_TRUE; 3180 } 3181 3182 /* allocate lapack workspace */ 3183 cum = cum2 = 0; 3184 maxneigs = 0; 3185 for (i=0;i<sub_schurs->n_subs;i++) { 3186 PetscInt n,subset_size; 3187 3188 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3189 n = PetscMin(subset_size,nmax); 3190 cum += subset_size; 3191 cum2 += subset_size*n; 3192 maxneigs = PetscMax(maxneigs,n); 3193 } 3194 lwork = 0; 3195 if (mss) { 3196 if (sub_schurs->is_symmetric) { 3197 PetscScalar sdummy = 0.; 3198 PetscBLASInt B_itype = 1; 3199 PetscBLASInt B_N = mss, idummy = 0; 3200 PetscReal rdummy = 0.,zero = 0.0; 3201 PetscReal eps = 0.0; /* dlamch? */ 3202 3203 B_lwork = -1; 3204 /* some implementations may complain about NULL pointers, even if we are querying */ 3205 S = &sdummy; 3206 St = &sdummy; 3207 eigs = &rdummy; 3208 eigv = &sdummy; 3209 B_iwork = &idummy; 3210 B_ifail = &idummy; 3211 #if defined(PETSC_USE_COMPLEX) 3212 rwork = &rdummy; 3213 #endif 3214 thresh = 1.0; 3215 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3216 #if defined(PETSC_USE_COMPLEX) 3217 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3218 #else 3219 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3220 #endif 3221 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3222 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3223 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3224 } 3225 3226 nv = 0; 3227 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3228 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3229 } 3230 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3231 if (allocated_S_St) { 3232 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3233 } 3234 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3235 #if defined(PETSC_USE_COMPLEX) 3236 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3237 #endif 3238 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3239 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3240 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3241 nv+cum,&pcbddc->adaptive_constraints_idxs, 3242 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3243 ierr = PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);CHKERRQ(ierr); 3244 3245 maxneigs = 0; 3246 cum = cumarray = 0; 3247 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3248 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3249 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3250 const PetscInt *idxs; 3251 3252 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3253 for (cum=0;cum<nv;cum++) { 3254 pcbddc->adaptive_constraints_n[cum] = 1; 3255 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3256 pcbddc->adaptive_constraints_data[cum] = 1.0; 3257 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3258 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3259 } 3260 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3261 } 3262 3263 if (mss) { /* multilevel */ 3264 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3265 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3266 } 3267 3268 lthresh = pcbddc->adaptive_threshold[0]; 3269 uthresh = pcbddc->adaptive_threshold[1]; 3270 for (i=0;i<sub_schurs->n_subs;i++) { 3271 const PetscInt *idxs; 3272 PetscReal upper,lower; 3273 PetscInt j,subset_size,eigs_start = 0; 3274 PetscBLASInt B_N; 3275 PetscBool same_data = PETSC_FALSE; 3276 PetscBool scal = PETSC_FALSE; 3277 3278 if (pcbddc->use_deluxe_scaling) { 3279 upper = PETSC_MAX_REAL; 3280 lower = uthresh; 3281 } else { 3282 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3283 upper = 1./uthresh; 3284 lower = 0.; 3285 } 3286 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3287 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3288 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3289 /* this is experimental: we assume the dofs have been properly grouped to have 3290 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3291 if (!sub_schurs->is_posdef) { 3292 Mat T; 3293 3294 for (j=0;j<subset_size;j++) { 3295 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3296 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3297 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3298 ierr = MatDestroy(&T);CHKERRQ(ierr); 3299 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3300 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3301 ierr = MatDestroy(&T);CHKERRQ(ierr); 3302 if (sub_schurs->change_primal_sub) { 3303 PetscInt nz,k; 3304 const PetscInt *idxs; 3305 3306 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3307 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3308 for (k=0;k<nz;k++) { 3309 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3310 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3311 } 3312 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3313 } 3314 scal = PETSC_TRUE; 3315 break; 3316 } 3317 } 3318 } 3319 3320 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3321 if (sub_schurs->is_symmetric) { 3322 PetscInt j,k; 3323 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3324 ierr = PetscArrayzero(S,subset_size*subset_size);CHKERRQ(ierr); 3325 ierr = PetscArrayzero(St,subset_size*subset_size);CHKERRQ(ierr); 3326 } 3327 for (j=0;j<subset_size;j++) { 3328 for (k=j;k<subset_size;k++) { 3329 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3330 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3331 } 3332 } 3333 } else { 3334 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3335 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3336 } 3337 } else { 3338 S = Sarray + cumarray; 3339 St = Starray + cumarray; 3340 } 3341 /* see if we can save some work */ 3342 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3343 ierr = PetscArraycmp(S,St,subset_size*subset_size,&same_data);CHKERRQ(ierr); 3344 } 3345 3346 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3347 B_neigs = 0; 3348 } else { 3349 if (sub_schurs->is_symmetric) { 3350 PetscBLASInt B_itype = 1; 3351 PetscBLASInt B_IL, B_IU; 3352 PetscReal eps = -1.0; /* dlamch? */ 3353 PetscInt nmin_s; 3354 PetscBool compute_range; 3355 3356 B_neigs = 0; 3357 compute_range = (PetscBool)!same_data; 3358 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3359 3360 if (pcbddc->dbg_flag) { 3361 PetscInt nc = 0; 3362 3363 if (sub_schurs->change_primal_sub) { 3364 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3365 } 3366 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3367 } 3368 3369 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3370 if (compute_range) { 3371 3372 /* ask for eigenvalues larger than thresh */ 3373 if (sub_schurs->is_posdef) { 3374 #if defined(PETSC_USE_COMPLEX) 3375 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3376 #else 3377 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3378 #endif 3379 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3380 } else { /* no theory so far, but it works nicely */ 3381 PetscInt recipe = 0,recipe_m = 1; 3382 PetscReal bb[2]; 3383 3384 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3385 switch (recipe) { 3386 case 0: 3387 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3388 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3389 #if defined(PETSC_USE_COMPLEX) 3390 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3391 #else 3392 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3393 #endif 3394 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3395 break; 3396 case 1: 3397 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3398 #if defined(PETSC_USE_COMPLEX) 3399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3400 #else 3401 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3402 #endif 3403 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3404 if (!scal) { 3405 PetscBLASInt B_neigs2 = 0; 3406 3407 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3408 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3409 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3410 #if defined(PETSC_USE_COMPLEX) 3411 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3412 #else 3413 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3414 #endif 3415 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3416 B_neigs += B_neigs2; 3417 } 3418 break; 3419 case 2: 3420 if (scal) { 3421 bb[0] = PETSC_MIN_REAL; 3422 bb[1] = 0; 3423 #if defined(PETSC_USE_COMPLEX) 3424 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3425 #else 3426 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3427 #endif 3428 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3429 } else { 3430 PetscBLASInt B_neigs2 = 0; 3431 PetscBool import = PETSC_FALSE; 3432 3433 lthresh = PetscMax(lthresh,0.0); 3434 if (lthresh > 0.0) { 3435 bb[0] = PETSC_MIN_REAL; 3436 bb[1] = lthresh*lthresh; 3437 3438 import = PETSC_TRUE; 3439 #if defined(PETSC_USE_COMPLEX) 3440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3441 #else 3442 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3443 #endif 3444 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3445 } 3446 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3447 bb[1] = PETSC_MAX_REAL; 3448 if (import) { 3449 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3450 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3451 } 3452 #if defined(PETSC_USE_COMPLEX) 3453 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3454 #else 3455 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3456 #endif 3457 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3458 B_neigs += B_neigs2; 3459 } 3460 break; 3461 case 3: 3462 if (scal) { 3463 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3464 } else { 3465 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3466 } 3467 if (!scal) { 3468 bb[0] = uthresh; 3469 bb[1] = PETSC_MAX_REAL; 3470 #if defined(PETSC_USE_COMPLEX) 3471 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3472 #else 3473 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3474 #endif 3475 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3476 } 3477 if (recipe_m > 0 && B_N - B_neigs > 0) { 3478 PetscBLASInt B_neigs2 = 0; 3479 3480 B_IL = 1; 3481 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3482 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3483 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3484 #if defined(PETSC_USE_COMPLEX) 3485 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3486 #else 3487 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3488 #endif 3489 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3490 B_neigs += B_neigs2; 3491 } 3492 break; 3493 case 4: 3494 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3495 #if defined(PETSC_USE_COMPLEX) 3496 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3497 #else 3498 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3499 #endif 3500 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3501 { 3502 PetscBLASInt B_neigs2 = 0; 3503 3504 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3505 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3506 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3507 #if defined(PETSC_USE_COMPLEX) 3508 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3509 #else 3510 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3511 #endif 3512 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3513 B_neigs += B_neigs2; 3514 } 3515 break; 3516 case 5: /* same as before: first compute all eigenvalues, then filter */ 3517 #if defined(PETSC_USE_COMPLEX) 3518 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3519 #else 3520 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3521 #endif 3522 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3523 { 3524 PetscInt e,k,ne; 3525 for (e=0,ne=0;e<B_neigs;e++) { 3526 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3527 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3528 eigs[ne] = eigs[e]; 3529 ne++; 3530 } 3531 } 3532 ierr = PetscArraycpy(eigv,S,B_N*ne);CHKERRQ(ierr); 3533 B_neigs = ne; 3534 } 3535 break; 3536 default: 3537 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3538 break; 3539 } 3540 } 3541 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3542 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3543 B_IL = 1; 3544 #if defined(PETSC_USE_COMPLEX) 3545 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3546 #else 3547 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3548 #endif 3549 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3550 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3551 PetscInt k; 3552 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3553 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3554 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3555 nmin = nmax; 3556 ierr = PetscArrayzero(eigv,subset_size*nmax);CHKERRQ(ierr); 3557 for (k=0;k<nmax;k++) { 3558 eigs[k] = 1./PETSC_SMALL; 3559 eigv[k*(subset_size+1)] = 1.0; 3560 } 3561 } 3562 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3563 if (B_ierr) { 3564 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3565 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3566 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3567 } 3568 3569 if (B_neigs > nmax) { 3570 if (pcbddc->dbg_flag) { 3571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3572 } 3573 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3574 B_neigs = nmax; 3575 } 3576 3577 nmin_s = PetscMin(nmin,B_N); 3578 if (B_neigs < nmin_s) { 3579 PetscBLASInt B_neigs2 = 0; 3580 3581 if (pcbddc->use_deluxe_scaling) { 3582 if (scal) { 3583 B_IU = nmin_s; 3584 B_IL = B_neigs + 1; 3585 } else { 3586 B_IL = B_N - nmin_s + 1; 3587 B_IU = B_N - B_neigs; 3588 } 3589 } else { 3590 B_IL = B_neigs + 1; 3591 B_IU = nmin_s; 3592 } 3593 if (pcbddc->dbg_flag) { 3594 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3595 } 3596 if (sub_schurs->is_symmetric) { 3597 PetscInt j,k; 3598 for (j=0;j<subset_size;j++) { 3599 for (k=j;k<subset_size;k++) { 3600 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3601 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3602 } 3603 } 3604 } else { 3605 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3606 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3607 } 3608 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3609 #if defined(PETSC_USE_COMPLEX) 3610 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3611 #else 3612 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3613 #endif 3614 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3615 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3616 B_neigs += B_neigs2; 3617 } 3618 if (B_ierr) { 3619 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3620 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3621 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3622 } 3623 if (pcbddc->dbg_flag) { 3624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3625 for (j=0;j<B_neigs;j++) { 3626 if (eigs[j] == 0.0) { 3627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3628 } else { 3629 if (pcbddc->use_deluxe_scaling) { 3630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3631 } else { 3632 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3633 } 3634 } 3635 } 3636 } 3637 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3638 } 3639 /* change the basis back to the original one */ 3640 if (sub_schurs->change) { 3641 Mat change,phi,phit; 3642 3643 if (pcbddc->dbg_flag > 2) { 3644 PetscInt ii; 3645 for (ii=0;ii<B_neigs;ii++) { 3646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3647 for (j=0;j<B_N;j++) { 3648 #if defined(PETSC_USE_COMPLEX) 3649 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3650 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3651 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3652 #else 3653 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3654 #endif 3655 } 3656 } 3657 } 3658 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3659 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3660 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3661 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3662 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3663 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3664 } 3665 maxneigs = PetscMax(B_neigs,maxneigs); 3666 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3667 if (B_neigs) { 3668 ierr = PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);CHKERRQ(ierr); 3669 3670 if (pcbddc->dbg_flag > 1) { 3671 PetscInt ii; 3672 for (ii=0;ii<B_neigs;ii++) { 3673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3674 for (j=0;j<B_N;j++) { 3675 #if defined(PETSC_USE_COMPLEX) 3676 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3677 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3678 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3679 #else 3680 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3681 #endif 3682 } 3683 } 3684 } 3685 ierr = PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);CHKERRQ(ierr); 3686 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3687 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3688 cum++; 3689 } 3690 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3691 /* shift for next computation */ 3692 cumarray += subset_size*subset_size; 3693 } 3694 if (pcbddc->dbg_flag) { 3695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3696 } 3697 3698 if (mss) { 3699 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3700 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3701 /* destroy matrices (junk) */ 3702 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3703 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3704 } 3705 if (allocated_S_St) { 3706 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3707 } 3708 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3709 #if defined(PETSC_USE_COMPLEX) 3710 ierr = PetscFree(rwork);CHKERRQ(ierr); 3711 #endif 3712 if (pcbddc->dbg_flag) { 3713 PetscInt maxneigs_r; 3714 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3715 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3716 } 3717 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3718 PetscFunctionReturn(0); 3719 } 3720 3721 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3722 { 3723 PetscScalar *coarse_submat_vals; 3724 PetscErrorCode ierr; 3725 3726 PetscFunctionBegin; 3727 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3728 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3729 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3730 3731 /* Setup local neumann solver ksp_R */ 3732 /* PCBDDCSetUpLocalScatters should be called first! */ 3733 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3734 3735 /* 3736 Setup local correction and local part of coarse basis. 3737 Gives back the dense local part of the coarse matrix in column major ordering 3738 */ 3739 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3740 3741 /* Compute total number of coarse nodes and setup coarse solver */ 3742 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3743 3744 /* free */ 3745 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3746 PetscFunctionReturn(0); 3747 } 3748 3749 PetscErrorCode PCBDDCResetCustomization(PC pc) 3750 { 3751 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3752 PetscErrorCode ierr; 3753 3754 PetscFunctionBegin; 3755 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3756 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3757 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3758 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3759 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3760 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3761 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3762 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3763 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3764 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3765 PetscFunctionReturn(0); 3766 } 3767 3768 PetscErrorCode PCBDDCResetTopography(PC pc) 3769 { 3770 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3771 PetscInt i; 3772 PetscErrorCode ierr; 3773 3774 PetscFunctionBegin; 3775 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3776 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3777 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3778 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3779 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3780 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3781 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3782 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3783 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3784 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3785 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3786 for (i=0;i<pcbddc->n_local_subs;i++) { 3787 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3788 } 3789 pcbddc->n_local_subs = 0; 3790 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3791 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3792 pcbddc->graphanalyzed = PETSC_FALSE; 3793 pcbddc->recompute_topography = PETSC_TRUE; 3794 pcbddc->corner_selected = PETSC_FALSE; 3795 PetscFunctionReturn(0); 3796 } 3797 3798 PetscErrorCode PCBDDCResetSolvers(PC pc) 3799 { 3800 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3801 PetscErrorCode ierr; 3802 3803 PetscFunctionBegin; 3804 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3805 if (pcbddc->coarse_phi_B) { 3806 PetscScalar *array; 3807 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3808 ierr = PetscFree(array);CHKERRQ(ierr); 3809 } 3810 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3811 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3812 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3813 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3814 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3815 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3816 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3817 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3818 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3819 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3820 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3821 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3822 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3823 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3824 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3825 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3826 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3827 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3828 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3829 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3830 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3831 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3832 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3833 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3834 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3835 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3836 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3837 if (pcbddc->benign_zerodiag_subs) { 3838 PetscInt i; 3839 for (i=0;i<pcbddc->benign_n;i++) { 3840 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3841 } 3842 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3843 } 3844 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3845 PetscFunctionReturn(0); 3846 } 3847 3848 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3849 { 3850 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3851 PC_IS *pcis = (PC_IS*)pc->data; 3852 VecType impVecType; 3853 PetscInt n_constraints,n_R,old_size; 3854 PetscErrorCode ierr; 3855 3856 PetscFunctionBegin; 3857 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3858 n_R = pcis->n - pcbddc->n_vertices; 3859 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3860 /* local work vectors (try to avoid unneeded work)*/ 3861 /* R nodes */ 3862 old_size = -1; 3863 if (pcbddc->vec1_R) { 3864 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3865 } 3866 if (n_R != old_size) { 3867 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3868 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3869 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3870 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3871 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3872 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3873 } 3874 /* local primal dofs */ 3875 old_size = -1; 3876 if (pcbddc->vec1_P) { 3877 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3878 } 3879 if (pcbddc->local_primal_size != old_size) { 3880 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3881 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3882 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3883 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3884 } 3885 /* local explicit constraints */ 3886 old_size = -1; 3887 if (pcbddc->vec1_C) { 3888 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3889 } 3890 if (n_constraints && n_constraints != old_size) { 3891 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3892 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3893 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3894 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3895 } 3896 PetscFunctionReturn(0); 3897 } 3898 3899 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3900 { 3901 PetscErrorCode ierr; 3902 /* pointers to pcis and pcbddc */ 3903 PC_IS* pcis = (PC_IS*)pc->data; 3904 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3905 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3906 /* submatrices of local problem */ 3907 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3908 /* submatrices of local coarse problem */ 3909 Mat S_VV,S_CV,S_VC,S_CC; 3910 /* working matrices */ 3911 Mat C_CR; 3912 /* additional working stuff */ 3913 PC pc_R; 3914 Mat F,Brhs = NULL; 3915 Vec dummy_vec; 3916 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3917 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3918 PetscScalar *work; 3919 PetscInt *idx_V_B; 3920 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3921 PetscInt i,n_R,n_D,n_B; 3922 PetscScalar one=1.0,m_one=-1.0; 3923 3924 PetscFunctionBegin; 3925 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3926 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3927 3928 /* Set Non-overlapping dimensions */ 3929 n_vertices = pcbddc->n_vertices; 3930 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3931 n_B = pcis->n_B; 3932 n_D = pcis->n - n_B; 3933 n_R = pcis->n - n_vertices; 3934 3935 /* vertices in boundary numbering */ 3936 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3937 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3938 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3939 3940 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3941 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3942 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3943 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3944 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3945 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3946 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3947 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3949 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3950 3951 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3952 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3953 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3954 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3955 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3956 lda_rhs = n_R; 3957 need_benign_correction = PETSC_FALSE; 3958 if (isLU || isCHOL) { 3959 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3960 } else if (sub_schurs && sub_schurs->reuse_solver) { 3961 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3962 MatFactorType type; 3963 3964 F = reuse_solver->F; 3965 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3966 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3967 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3968 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3969 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3970 } else F = NULL; 3971 3972 /* determine if we can use a sparse right-hand side */ 3973 sparserhs = PETSC_FALSE; 3974 if (F) { 3975 MatSolverType solver; 3976 3977 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3978 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3979 } 3980 3981 /* allocate workspace */ 3982 n = 0; 3983 if (n_constraints) { 3984 n += lda_rhs*n_constraints; 3985 } 3986 if (n_vertices) { 3987 n = PetscMax(2*lda_rhs*n_vertices,n); 3988 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3989 } 3990 if (!pcbddc->symmetric_primal) { 3991 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3992 } 3993 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3994 3995 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3996 dummy_vec = NULL; 3997 if (need_benign_correction && lda_rhs != n_R && F) { 3998 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3999 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 4000 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 4001 } 4002 4003 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 4004 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 4005 4006 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4007 if (n_constraints) { 4008 Mat M3,C_B; 4009 IS is_aux; 4010 PetscScalar *array,*array2; 4011 4012 /* Extract constraints on R nodes: C_{CR} */ 4013 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 4014 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 4015 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4016 4017 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4018 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4019 if (!sparserhs) { 4020 ierr = PetscArrayzero(work,lda_rhs*n_constraints);CHKERRQ(ierr); 4021 for (i=0;i<n_constraints;i++) { 4022 const PetscScalar *row_cmat_values; 4023 const PetscInt *row_cmat_indices; 4024 PetscInt size_of_constraint,j; 4025 4026 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4027 for (j=0;j<size_of_constraint;j++) { 4028 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4029 } 4030 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4031 } 4032 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 4033 } else { 4034 Mat tC_CR; 4035 4036 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4037 if (lda_rhs != n_R) { 4038 PetscScalar *aa; 4039 PetscInt r,*ii,*jj; 4040 PetscBool done; 4041 4042 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4043 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4044 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 4045 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 4046 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4047 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4048 } else { 4049 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 4050 tC_CR = C_CR; 4051 } 4052 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 4053 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 4054 } 4055 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 4056 if (F) { 4057 if (need_benign_correction) { 4058 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4059 4060 /* rhs is already zero on interior dofs, no need to change the rhs */ 4061 ierr = PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);CHKERRQ(ierr); 4062 } 4063 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 4064 if (need_benign_correction) { 4065 PetscScalar *marr; 4066 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4067 4068 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4069 if (lda_rhs != n_R) { 4070 for (i=0;i<n_constraints;i++) { 4071 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4072 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4073 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4074 } 4075 } else { 4076 for (i=0;i<n_constraints;i++) { 4077 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4078 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4079 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4080 } 4081 } 4082 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4083 } 4084 } else { 4085 PetscScalar *marr; 4086 4087 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4088 for (i=0;i<n_constraints;i++) { 4089 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4090 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 4091 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4092 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4093 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4094 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4095 } 4096 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4097 } 4098 if (sparserhs) { 4099 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4100 } 4101 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4102 if (!pcbddc->switch_static) { 4103 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4104 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4105 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4106 for (i=0;i<n_constraints;i++) { 4107 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4108 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4109 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4110 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4111 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4112 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4113 } 4114 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4115 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4116 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4117 } else { 4118 if (lda_rhs != n_R) { 4119 IS dummy; 4120 4121 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4122 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4123 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4124 } else { 4125 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4126 pcbddc->local_auxmat2 = local_auxmat2_R; 4127 } 4128 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4129 } 4130 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4131 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 4132 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4133 if (isCHOL) { 4134 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4135 } else { 4136 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4137 } 4138 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4139 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4140 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4141 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4142 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4143 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4144 } 4145 4146 /* Get submatrices from subdomain matrix */ 4147 if (n_vertices) { 4148 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4149 PetscBool oldpin; 4150 #endif 4151 PetscBool isaij; 4152 IS is_aux; 4153 4154 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4155 IS tis; 4156 4157 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4158 ierr = ISSort(tis);CHKERRQ(ierr); 4159 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4160 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4161 } else { 4162 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4163 } 4164 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4165 oldpin = pcbddc->local_mat->boundtocpu; 4166 #endif 4167 ierr = MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);CHKERRQ(ierr); 4168 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4169 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4170 ierr = PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);CHKERRQ(ierr); 4171 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4172 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4173 } 4174 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4175 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4176 ierr = MatBindToCPU(pcbddc->local_mat,oldpin);CHKERRQ(ierr); 4177 #endif 4178 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4179 } 4180 4181 /* Matrix of coarse basis functions (local) */ 4182 if (pcbddc->coarse_phi_B) { 4183 PetscInt on_B,on_primal,on_D=n_D; 4184 if (pcbddc->coarse_phi_D) { 4185 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4186 } 4187 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4188 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4189 PetscScalar *marray; 4190 4191 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4192 ierr = PetscFree(marray);CHKERRQ(ierr); 4193 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4194 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4195 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4196 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4197 } 4198 } 4199 4200 if (!pcbddc->coarse_phi_B) { 4201 PetscScalar *marr; 4202 4203 /* memory size */ 4204 n = n_B*pcbddc->local_primal_size; 4205 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4206 if (!pcbddc->symmetric_primal) n *= 2; 4207 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4208 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4209 marr += n_B*pcbddc->local_primal_size; 4210 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4211 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4212 marr += n_D*pcbddc->local_primal_size; 4213 } 4214 if (!pcbddc->symmetric_primal) { 4215 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4216 marr += n_B*pcbddc->local_primal_size; 4217 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4218 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4219 } 4220 } else { 4221 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4222 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4223 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4224 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4225 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4226 } 4227 } 4228 } 4229 4230 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4231 p0_lidx_I = NULL; 4232 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4233 const PetscInt *idxs; 4234 4235 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4236 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4237 for (i=0;i<pcbddc->benign_n;i++) { 4238 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4239 } 4240 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4241 } 4242 4243 /* vertices */ 4244 if (n_vertices) { 4245 PetscBool restoreavr = PETSC_FALSE; 4246 4247 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4248 4249 if (n_R) { 4250 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4251 PetscBLASInt B_N,B_one = 1; 4252 const PetscScalar *x; 4253 PetscScalar *y; 4254 4255 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4256 if (need_benign_correction) { 4257 ISLocalToGlobalMapping RtoN; 4258 IS is_p0; 4259 PetscInt *idxs_p0,n; 4260 4261 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4262 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4263 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4264 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4265 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4266 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4267 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4268 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4269 } 4270 4271 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4272 if (!sparserhs || need_benign_correction) { 4273 if (lda_rhs == n_R) { 4274 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4275 } else { 4276 PetscScalar *av,*array; 4277 const PetscInt *xadj,*adjncy; 4278 PetscInt n; 4279 PetscBool flg_row; 4280 4281 array = work+lda_rhs*n_vertices; 4282 ierr = PetscArrayzero(array,lda_rhs*n_vertices);CHKERRQ(ierr); 4283 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4284 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4285 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4286 for (i=0;i<n;i++) { 4287 PetscInt j; 4288 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4289 } 4290 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4291 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4292 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4293 } 4294 if (need_benign_correction) { 4295 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4296 PetscScalar *marr; 4297 4298 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4299 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4300 4301 | 0 0 0 | (V) 4302 L = | 0 0 -1 | (P-p0) 4303 | 0 0 -1 | (p0) 4304 4305 */ 4306 for (i=0;i<reuse_solver->benign_n;i++) { 4307 const PetscScalar *vals; 4308 const PetscInt *idxs,*idxs_zero; 4309 PetscInt n,j,nz; 4310 4311 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4312 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4313 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4314 for (j=0;j<n;j++) { 4315 PetscScalar val = vals[j]; 4316 PetscInt k,col = idxs[j]; 4317 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4318 } 4319 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4320 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4321 } 4322 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4323 } 4324 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4325 Brhs = A_RV; 4326 } else { 4327 Mat tA_RVT,A_RVT; 4328 4329 if (!pcbddc->symmetric_primal) { 4330 /* A_RV already scaled by -1 */ 4331 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4332 } else { 4333 restoreavr = PETSC_TRUE; 4334 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4335 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4336 A_RVT = A_VR; 4337 } 4338 if (lda_rhs != n_R) { 4339 PetscScalar *aa; 4340 PetscInt r,*ii,*jj; 4341 PetscBool done; 4342 4343 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4344 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4345 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4346 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4347 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4348 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4349 } else { 4350 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4351 tA_RVT = A_RVT; 4352 } 4353 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4354 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4355 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4356 } 4357 if (F) { 4358 /* need to correct the rhs */ 4359 if (need_benign_correction) { 4360 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4361 PetscScalar *marr; 4362 4363 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4364 if (lda_rhs != n_R) { 4365 for (i=0;i<n_vertices;i++) { 4366 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4367 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4368 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4369 } 4370 } else { 4371 for (i=0;i<n_vertices;i++) { 4372 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4373 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4374 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4375 } 4376 } 4377 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4378 } 4379 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4380 if (restoreavr) { 4381 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4382 } 4383 /* need to correct the solution */ 4384 if (need_benign_correction) { 4385 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4386 PetscScalar *marr; 4387 4388 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4389 if (lda_rhs != n_R) { 4390 for (i=0;i<n_vertices;i++) { 4391 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4392 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4393 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4394 } 4395 } else { 4396 for (i=0;i<n_vertices;i++) { 4397 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4398 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4399 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4400 } 4401 } 4402 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4403 } 4404 } else { 4405 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4406 for (i=0;i<n_vertices;i++) { 4407 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4408 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4409 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4410 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4411 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4412 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4413 } 4414 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4415 } 4416 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4417 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4418 /* S_VV and S_CV */ 4419 if (n_constraints) { 4420 Mat B; 4421 4422 ierr = PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);CHKERRQ(ierr); 4423 for (i=0;i<n_vertices;i++) { 4424 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4425 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4426 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4427 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4428 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4429 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4430 } 4431 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4432 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4433 ierr = MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);CHKERRQ(ierr); 4434 ierr = MatProductSetType(S_CV,MATPRODUCT_AB);CHKERRQ(ierr); 4435 ierr = MatProductSetFromOptions(S_CV);CHKERRQ(ierr); 4436 ierr = MatProductNumeric(S_CV);CHKERRQ(ierr); 4437 ierr = MatProductClear(S_CV);CHKERRQ(ierr); 4438 4439 ierr = MatDestroy(&B);CHKERRQ(ierr); 4440 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4441 /* Reuse B = local_auxmat2_R * S_CV */ 4442 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);CHKERRQ(ierr); 4443 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4444 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4445 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4446 4447 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4448 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4449 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4450 ierr = MatDestroy(&B);CHKERRQ(ierr); 4451 } 4452 if (lda_rhs != n_R) { 4453 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4454 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4455 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4456 } 4457 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4458 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4459 if (need_benign_correction) { 4460 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4461 PetscScalar *marr,*sums; 4462 4463 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4464 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4465 for (i=0;i<reuse_solver->benign_n;i++) { 4466 const PetscScalar *vals; 4467 const PetscInt *idxs,*idxs_zero; 4468 PetscInt n,j,nz; 4469 4470 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4471 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4472 for (j=0;j<n_vertices;j++) { 4473 PetscInt k; 4474 sums[j] = 0.; 4475 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4476 } 4477 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4478 for (j=0;j<n;j++) { 4479 PetscScalar val = vals[j]; 4480 PetscInt k; 4481 for (k=0;k<n_vertices;k++) { 4482 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4483 } 4484 } 4485 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4486 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4487 } 4488 ierr = PetscFree(sums);CHKERRQ(ierr); 4489 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4490 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4491 } 4492 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4493 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4494 ierr = MatDenseGetArrayRead(A_VV,&x);CHKERRQ(ierr); 4495 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4496 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4497 ierr = MatDenseRestoreArrayRead(A_VV,&x);CHKERRQ(ierr); 4498 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4499 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4500 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4501 } else { 4502 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4503 } 4504 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4505 4506 /* coarse basis functions */ 4507 for (i=0;i<n_vertices;i++) { 4508 PetscScalar *y; 4509 4510 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4511 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4512 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4513 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4514 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4515 y[n_B*i+idx_V_B[i]] = 1.0; 4516 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4517 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4518 4519 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4520 PetscInt j; 4521 4522 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4523 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4524 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4525 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4526 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4527 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4528 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4529 } 4530 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4531 } 4532 /* if n_R == 0 the object is not destroyed */ 4533 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4534 } 4535 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4536 4537 if (n_constraints) { 4538 Mat B; 4539 4540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4541 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4542 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);CHKERRQ(ierr); 4543 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4544 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4545 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4546 4547 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4548 if (n_vertices) { 4549 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4550 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4551 } else { 4552 Mat S_VCt; 4553 4554 if (lda_rhs != n_R) { 4555 ierr = MatDestroy(&B);CHKERRQ(ierr); 4556 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4557 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4558 } 4559 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4560 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4561 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4562 } 4563 } 4564 ierr = MatDestroy(&B);CHKERRQ(ierr); 4565 /* coarse basis functions */ 4566 for (i=0;i<n_constraints;i++) { 4567 PetscScalar *y; 4568 4569 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4570 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4571 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4572 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4573 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4574 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4575 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4576 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4577 PetscInt j; 4578 4579 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4580 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4583 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4584 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4585 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4586 } 4587 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4588 } 4589 } 4590 if (n_constraints) { 4591 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4592 } 4593 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4594 4595 /* coarse matrix entries relative to B_0 */ 4596 if (pcbddc->benign_n) { 4597 Mat B0_B,B0_BPHI; 4598 IS is_dummy; 4599 const PetscScalar *data; 4600 PetscInt j; 4601 4602 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4603 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4604 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4605 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4606 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4607 ierr = MatDenseGetArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4608 for (j=0;j<pcbddc->benign_n;j++) { 4609 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4610 for (i=0;i<pcbddc->local_primal_size;i++) { 4611 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4612 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4613 } 4614 } 4615 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4616 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4617 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4618 } 4619 4620 /* compute other basis functions for non-symmetric problems */ 4621 if (!pcbddc->symmetric_primal) { 4622 Mat B_V=NULL,B_C=NULL; 4623 PetscScalar *marray; 4624 4625 if (n_constraints) { 4626 Mat S_CCT,C_CRT; 4627 4628 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4629 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4630 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4631 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4632 if (n_vertices) { 4633 Mat S_VCT; 4634 4635 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4636 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4637 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4638 } 4639 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4640 } else { 4641 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4642 } 4643 if (n_vertices && n_R) { 4644 PetscScalar *av,*marray; 4645 const PetscInt *xadj,*adjncy; 4646 PetscInt n; 4647 PetscBool flg_row; 4648 4649 /* B_V = B_V - A_VR^T */ 4650 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4651 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4652 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4653 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4654 for (i=0;i<n;i++) { 4655 PetscInt j; 4656 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4657 } 4658 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4659 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4660 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4661 } 4662 4663 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4664 if (n_vertices) { 4665 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4666 for (i=0;i<n_vertices;i++) { 4667 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4668 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4669 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4670 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4671 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4672 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4673 } 4674 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4675 } 4676 if (B_C) { 4677 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4678 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4679 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4680 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4681 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4682 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4683 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4684 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4685 } 4686 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4687 } 4688 /* coarse basis functions */ 4689 for (i=0;i<pcbddc->local_primal_size;i++) { 4690 PetscScalar *y; 4691 4692 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4693 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4694 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4695 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4696 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4697 if (i<n_vertices) { 4698 y[n_B*i+idx_V_B[i]] = 1.0; 4699 } 4700 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4701 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4702 4703 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4704 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4705 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4706 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4707 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4708 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4709 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4710 } 4711 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4712 } 4713 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4714 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4715 } 4716 4717 /* free memory */ 4718 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4719 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4720 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4721 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4722 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4723 ierr = PetscFree(work);CHKERRQ(ierr); 4724 if (n_vertices) { 4725 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4726 } 4727 if (n_constraints) { 4728 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4729 } 4730 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4731 4732 /* Checking coarse_sub_mat and coarse basis functios */ 4733 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4734 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4735 if (pcbddc->dbg_flag) { 4736 Mat coarse_sub_mat; 4737 Mat AUXMAT,TM1,TM2,TM3,TM4; 4738 Mat coarse_phi_D,coarse_phi_B; 4739 Mat coarse_psi_D,coarse_psi_B; 4740 Mat A_II,A_BB,A_IB,A_BI; 4741 Mat C_B,CPHI; 4742 IS is_dummy; 4743 Vec mones; 4744 MatType checkmattype=MATSEQAIJ; 4745 PetscReal real_value; 4746 4747 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4748 Mat A; 4749 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4750 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4751 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4752 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4753 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4754 ierr = MatDestroy(&A);CHKERRQ(ierr); 4755 } else { 4756 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4757 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4758 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4759 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4760 } 4761 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4762 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4763 if (!pcbddc->symmetric_primal) { 4764 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4765 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4766 } 4767 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4768 4769 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4770 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4771 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4772 if (!pcbddc->symmetric_primal) { 4773 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4774 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4775 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4776 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4777 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4778 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4779 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4780 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4781 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4782 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4783 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4784 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4785 } else { 4786 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4787 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4788 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4789 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4790 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4791 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4792 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4793 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4794 } 4795 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4796 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4797 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4798 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4799 if (pcbddc->benign_n) { 4800 Mat B0_B,B0_BPHI; 4801 const PetscScalar *data2; 4802 PetscScalar *data; 4803 PetscInt j; 4804 4805 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4806 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4807 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4808 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4809 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4810 ierr = MatDenseGetArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4811 for (j=0;j<pcbddc->benign_n;j++) { 4812 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4813 for (i=0;i<pcbddc->local_primal_size;i++) { 4814 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4815 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4816 } 4817 } 4818 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4819 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4820 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4821 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4822 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4823 } 4824 #if 0 4825 { 4826 PetscViewer viewer; 4827 char filename[256]; 4828 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4829 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4830 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4831 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4832 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4833 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4834 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4835 if (pcbddc->coarse_phi_B) { 4836 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4837 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4838 } 4839 if (pcbddc->coarse_phi_D) { 4840 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4841 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4842 } 4843 if (pcbddc->coarse_psi_B) { 4844 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4845 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4846 } 4847 if (pcbddc->coarse_psi_D) { 4848 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4849 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4850 } 4851 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4852 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4853 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4854 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4855 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4856 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4857 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4858 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4859 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4860 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4861 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4862 } 4863 #endif 4864 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4865 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4866 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4867 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4868 4869 /* check constraints */ 4870 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4871 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4872 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4873 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4874 } else { 4875 PetscScalar *data; 4876 Mat tmat; 4877 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4879 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4880 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4881 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4882 } 4883 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4884 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4885 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4886 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4888 if (!pcbddc->symmetric_primal) { 4889 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4890 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4891 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4892 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4893 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4894 } 4895 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4896 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4897 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4898 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4899 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4900 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4901 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4902 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4903 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4904 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4905 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4906 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4907 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4908 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4909 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4910 if (!pcbddc->symmetric_primal) { 4911 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4912 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4913 } 4914 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4915 } 4916 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4917 { 4918 PetscBool gpu; 4919 4920 ierr = PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);CHKERRQ(ierr); 4921 if (gpu) { 4922 if (pcbddc->local_auxmat1) { 4923 ierr = MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4924 } 4925 if (pcbddc->local_auxmat2) { 4926 ierr = MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4927 } 4928 if (pcbddc->coarse_phi_B) { 4929 ierr = MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4930 } 4931 if (pcbddc->coarse_phi_D) { 4932 ierr = MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4933 } 4934 if (pcbddc->coarse_psi_B) { 4935 ierr = MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4936 } 4937 if (pcbddc->coarse_psi_D) { 4938 ierr = MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4939 } 4940 } 4941 } 4942 /* get back data */ 4943 *coarse_submat_vals_n = coarse_submat_vals; 4944 PetscFunctionReturn(0); 4945 } 4946 4947 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4948 { 4949 Mat *work_mat; 4950 IS isrow_s,iscol_s; 4951 PetscBool rsorted,csorted; 4952 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4953 PetscErrorCode ierr; 4954 4955 PetscFunctionBegin; 4956 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4957 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4958 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4959 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4960 4961 if (!rsorted) { 4962 const PetscInt *idxs; 4963 PetscInt *idxs_sorted,i; 4964 4965 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4966 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4967 for (i=0;i<rsize;i++) { 4968 idxs_perm_r[i] = i; 4969 } 4970 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4971 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4972 for (i=0;i<rsize;i++) { 4973 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4974 } 4975 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4976 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4977 } else { 4978 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4979 isrow_s = isrow; 4980 } 4981 4982 if (!csorted) { 4983 if (isrow == iscol) { 4984 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4985 iscol_s = isrow_s; 4986 } else { 4987 const PetscInt *idxs; 4988 PetscInt *idxs_sorted,i; 4989 4990 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4991 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4992 for (i=0;i<csize;i++) { 4993 idxs_perm_c[i] = i; 4994 } 4995 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4996 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4997 for (i=0;i<csize;i++) { 4998 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4999 } 5000 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 5001 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 5002 } 5003 } else { 5004 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 5005 iscol_s = iscol; 5006 } 5007 5008 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5009 5010 if (!rsorted || !csorted) { 5011 Mat new_mat; 5012 IS is_perm_r,is_perm_c; 5013 5014 if (!rsorted) { 5015 PetscInt *idxs_r,i; 5016 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 5017 for (i=0;i<rsize;i++) { 5018 idxs_r[idxs_perm_r[i]] = i; 5019 } 5020 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 5021 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 5022 } else { 5023 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 5024 } 5025 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 5026 5027 if (!csorted) { 5028 if (isrow_s == iscol_s) { 5029 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 5030 is_perm_c = is_perm_r; 5031 } else { 5032 PetscInt *idxs_c,i; 5033 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5034 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 5035 for (i=0;i<csize;i++) { 5036 idxs_c[idxs_perm_c[i]] = i; 5037 } 5038 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 5039 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 5040 } 5041 } else { 5042 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 5043 } 5044 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 5045 5046 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 5047 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 5048 work_mat[0] = new_mat; 5049 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 5050 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 5051 } 5052 5053 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 5054 *B = work_mat[0]; 5055 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 5056 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 5057 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 5058 PetscFunctionReturn(0); 5059 } 5060 5061 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5062 { 5063 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5064 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5065 Mat new_mat,lA; 5066 IS is_local,is_global; 5067 PetscInt local_size; 5068 PetscBool isseqaij; 5069 PetscErrorCode ierr; 5070 5071 PetscFunctionBegin; 5072 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5073 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 5074 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 5075 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 5076 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 5077 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 5078 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5079 5080 if (pcbddc->dbg_flag) { 5081 Vec x,x_change; 5082 PetscReal error; 5083 5084 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 5085 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5086 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 5087 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5088 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5089 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 5090 if (!pcbddc->change_interior) { 5091 const PetscScalar *x,*y,*v; 5092 PetscReal lerror = 0.; 5093 PetscInt i; 5094 5095 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 5096 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 5097 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 5098 for (i=0;i<local_size;i++) 5099 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5100 lerror = PetscAbsScalar(x[i]-y[i]); 5101 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 5102 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 5103 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 5104 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5105 if (error > PETSC_SMALL) { 5106 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5107 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5108 } else { 5109 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5110 } 5111 } 5112 } 5113 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5114 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5115 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5116 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5117 if (error > PETSC_SMALL) { 5118 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5119 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5120 } else { 5121 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5122 } 5123 } 5124 ierr = VecDestroy(&x);CHKERRQ(ierr); 5125 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5126 } 5127 5128 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5129 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 5130 5131 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5132 ierr = PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5133 if (isseqaij) { 5134 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5135 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5136 if (lA) { 5137 Mat work; 5138 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5139 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5140 ierr = MatDestroy(&work);CHKERRQ(ierr); 5141 } 5142 } else { 5143 Mat work_mat; 5144 5145 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5146 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5147 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5148 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 5149 if (lA) { 5150 Mat work; 5151 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5152 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5153 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5154 ierr = MatDestroy(&work);CHKERRQ(ierr); 5155 } 5156 } 5157 if (matis->A->symmetric_set) { 5158 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 5159 #if !defined(PETSC_USE_COMPLEX) 5160 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 5161 #endif 5162 } 5163 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5164 PetscFunctionReturn(0); 5165 } 5166 5167 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5168 { 5169 PC_IS* pcis = (PC_IS*)(pc->data); 5170 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5171 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5172 PetscInt *idx_R_local=NULL; 5173 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5174 PetscInt vbs,bs; 5175 PetscBT bitmask=NULL; 5176 PetscErrorCode ierr; 5177 5178 PetscFunctionBegin; 5179 /* 5180 No need to setup local scatters if 5181 - primal space is unchanged 5182 AND 5183 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5184 AND 5185 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5186 */ 5187 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5188 PetscFunctionReturn(0); 5189 } 5190 /* destroy old objects */ 5191 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5192 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5193 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5194 /* Set Non-overlapping dimensions */ 5195 n_B = pcis->n_B; 5196 n_D = pcis->n - n_B; 5197 n_vertices = pcbddc->n_vertices; 5198 5199 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5200 5201 /* create auxiliary bitmask and allocate workspace */ 5202 if (!sub_schurs || !sub_schurs->reuse_solver) { 5203 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5204 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5205 for (i=0;i<n_vertices;i++) { 5206 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5207 } 5208 5209 for (i=0, n_R=0; i<pcis->n; i++) { 5210 if (!PetscBTLookup(bitmask,i)) { 5211 idx_R_local[n_R++] = i; 5212 } 5213 } 5214 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5215 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5216 5217 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5218 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5219 } 5220 5221 /* Block code */ 5222 vbs = 1; 5223 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5224 if (bs>1 && !(n_vertices%bs)) { 5225 PetscBool is_blocked = PETSC_TRUE; 5226 PetscInt *vary; 5227 if (!sub_schurs || !sub_schurs->reuse_solver) { 5228 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5229 ierr = PetscArrayzero(vary,pcis->n/bs);CHKERRQ(ierr); 5230 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5231 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5232 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5233 for (i=0; i<pcis->n/bs; i++) { 5234 if (vary[i]!=0 && vary[i]!=bs) { 5235 is_blocked = PETSC_FALSE; 5236 break; 5237 } 5238 } 5239 ierr = PetscFree(vary);CHKERRQ(ierr); 5240 } else { 5241 /* Verify directly the R set */ 5242 for (i=0; i<n_R/bs; i++) { 5243 PetscInt j,node=idx_R_local[bs*i]; 5244 for (j=1; j<bs; j++) { 5245 if (node != idx_R_local[bs*i+j]-j) { 5246 is_blocked = PETSC_FALSE; 5247 break; 5248 } 5249 } 5250 } 5251 } 5252 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5253 vbs = bs; 5254 for (i=0;i<n_R/vbs;i++) { 5255 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5256 } 5257 } 5258 } 5259 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5260 if (sub_schurs && sub_schurs->reuse_solver) { 5261 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5262 5263 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5264 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5265 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5266 reuse_solver->is_R = pcbddc->is_R_local; 5267 } else { 5268 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5269 } 5270 5271 /* print some info if requested */ 5272 if (pcbddc->dbg_flag) { 5273 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5275 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5280 } 5281 5282 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5283 if (!sub_schurs || !sub_schurs->reuse_solver) { 5284 IS is_aux1,is_aux2; 5285 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5286 5287 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5288 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5289 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5290 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5291 for (i=0; i<n_D; i++) { 5292 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5293 } 5294 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5295 for (i=0, j=0; i<n_R; i++) { 5296 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5297 aux_array1[j++] = i; 5298 } 5299 } 5300 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5301 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5302 for (i=0, j=0; i<n_B; i++) { 5303 if (!PetscBTLookup(bitmask,is_indices[i])) { 5304 aux_array2[j++] = i; 5305 } 5306 } 5307 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5308 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5309 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5310 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5311 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5312 5313 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5314 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5315 for (i=0, j=0; i<n_R; i++) { 5316 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5317 aux_array1[j++] = i; 5318 } 5319 } 5320 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5321 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5322 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5323 } 5324 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5325 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5326 } else { 5327 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5328 IS tis; 5329 PetscInt schur_size; 5330 5331 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5332 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5333 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5334 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5335 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5336 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5337 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5338 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5339 } 5340 } 5341 PetscFunctionReturn(0); 5342 } 5343 5344 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5345 { 5346 MatNullSpace NullSpace; 5347 Mat dmat; 5348 const Vec *nullvecs; 5349 Vec v,v2,*nullvecs2; 5350 VecScatter sct = NULL; 5351 PetscContainer c; 5352 PetscScalar *ddata; 5353 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5354 PetscBool nnsp_has_cnst; 5355 PetscErrorCode ierr; 5356 5357 PetscFunctionBegin; 5358 if (!is && !B) { /* MATIS */ 5359 Mat_IS* matis = (Mat_IS*)A->data; 5360 5361 if (!B) { 5362 ierr = MatISGetLocalMat(A,&B);CHKERRQ(ierr); 5363 } 5364 sct = matis->cctx; 5365 ierr = PetscObjectReference((PetscObject)sct);CHKERRQ(ierr); 5366 } else { 5367 ierr = MatGetNullSpace(B,&NullSpace);CHKERRQ(ierr); 5368 if (!NullSpace) { 5369 ierr = MatGetNearNullSpace(B,&NullSpace);CHKERRQ(ierr); 5370 } 5371 if (NullSpace) PetscFunctionReturn(0); 5372 } 5373 ierr = MatGetNullSpace(A,&NullSpace);CHKERRQ(ierr); 5374 if (!NullSpace) { 5375 ierr = MatGetNearNullSpace(A,&NullSpace);CHKERRQ(ierr); 5376 } 5377 if (!NullSpace) PetscFunctionReturn(0); 5378 5379 ierr = MatCreateVecs(A,&v,NULL);CHKERRQ(ierr); 5380 ierr = MatCreateVecs(B,&v2,NULL);CHKERRQ(ierr); 5381 if (!sct) { 5382 ierr = VecScatterCreate(v,is,v2,NULL,&sct);CHKERRQ(ierr); 5383 } 5384 ierr = MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);CHKERRQ(ierr); 5385 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5386 ierr = PetscMalloc1(bsiz,&nullvecs2);CHKERRQ(ierr); 5387 ierr = VecGetBlockSize(v2,&bs);CHKERRQ(ierr); 5388 ierr = VecGetSize(v2,&N);CHKERRQ(ierr); 5389 ierr = VecGetLocalSize(v2,&n);CHKERRQ(ierr); 5390 ierr = PetscMalloc1(n*bsiz,&ddata);CHKERRQ(ierr); 5391 for (k=0;k<nnsp_size;k++) { 5392 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);CHKERRQ(ierr); 5393 ierr = VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5394 ierr = VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5395 } 5396 if (nnsp_has_cnst) { 5397 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);CHKERRQ(ierr); 5398 ierr = VecSet(nullvecs2[nnsp_size],1.0);CHKERRQ(ierr); 5399 } 5400 ierr = PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);CHKERRQ(ierr); 5401 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);CHKERRQ(ierr); 5402 5403 ierr = MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);CHKERRQ(ierr); 5404 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);CHKERRQ(ierr); 5405 ierr = PetscContainerSetPointer(c,ddata);CHKERRQ(ierr); 5406 ierr = PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 5407 ierr = PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);CHKERRQ(ierr); 5408 ierr = PetscContainerDestroy(&c);CHKERRQ(ierr); 5409 ierr = PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);CHKERRQ(ierr); 5410 ierr = MatDestroy(&dmat);CHKERRQ(ierr); 5411 5412 for (k=0;k<bsiz;k++) { 5413 ierr = VecDestroy(&nullvecs2[k]);CHKERRQ(ierr); 5414 } 5415 ierr = PetscFree(nullvecs2);CHKERRQ(ierr); 5416 ierr = MatSetNearNullSpace(B,NullSpace);CHKERRQ(ierr); 5417 ierr = MatNullSpaceDestroy(&NullSpace);CHKERRQ(ierr); 5418 ierr = VecDestroy(&v);CHKERRQ(ierr); 5419 ierr = VecDestroy(&v2);CHKERRQ(ierr); 5420 ierr = VecScatterDestroy(&sct);CHKERRQ(ierr); 5421 PetscFunctionReturn(0); 5422 } 5423 5424 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5425 { 5426 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5427 PC_IS *pcis = (PC_IS*)pc->data; 5428 PC pc_temp; 5429 Mat A_RR; 5430 MatNullSpace nnsp; 5431 MatReuse reuse; 5432 PetscScalar m_one = -1.0; 5433 PetscReal value; 5434 PetscInt n_D,n_R; 5435 PetscBool issbaij,opts; 5436 PetscErrorCode ierr; 5437 void (*f)(void) = 0; 5438 char dir_prefix[256],neu_prefix[256],str_level[16]; 5439 size_t len; 5440 5441 PetscFunctionBegin; 5442 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5443 /* approximate solver, propagate NearNullSpace if needed */ 5444 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5445 MatNullSpace gnnsp1,gnnsp2; 5446 PetscBool lhas,ghas; 5447 5448 ierr = MatGetNearNullSpace(pcbddc->local_mat,&nnsp);CHKERRQ(ierr); 5449 ierr = MatGetNearNullSpace(pc->pmat,&gnnsp1);CHKERRQ(ierr); 5450 ierr = MatGetNullSpace(pc->pmat,&gnnsp2);CHKERRQ(ierr); 5451 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5452 ierr = MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5453 if (!ghas && (gnnsp1 || gnnsp2)) { 5454 ierr = MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);CHKERRQ(ierr); 5455 } 5456 } 5457 5458 /* compute prefixes */ 5459 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5460 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5461 if (!pcbddc->current_level) { 5462 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5463 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5464 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5465 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5466 } else { 5467 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5468 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5469 len -= 15; /* remove "pc_bddc_coarse_" */ 5470 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5471 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5472 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5473 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5474 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5475 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5476 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5477 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5478 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5479 } 5480 5481 /* DIRICHLET PROBLEM */ 5482 if (dirichlet) { 5483 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5484 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5485 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5486 if (pcbddc->dbg_flag) { 5487 Mat A_IIn; 5488 5489 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5490 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5491 pcis->A_II = A_IIn; 5492 } 5493 } 5494 if (pcbddc->local_mat->symmetric_set) { 5495 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5496 } 5497 /* Matrix for Dirichlet problem is pcis->A_II */ 5498 n_D = pcis->n - pcis->n_B; 5499 opts = PETSC_FALSE; 5500 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5501 opts = PETSC_TRUE; 5502 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5503 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5504 /* default */ 5505 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5506 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5507 ierr = PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5508 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5509 if (issbaij) { 5510 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5511 } else { 5512 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5513 } 5514 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5515 } 5516 ierr = MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5517 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);CHKERRQ(ierr); 5518 /* Allow user's customization */ 5519 if (opts) { 5520 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5521 } 5522 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5523 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5524 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);CHKERRQ(ierr); 5525 } 5526 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5527 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5528 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5529 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5530 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5531 const PetscInt *idxs; 5532 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5533 5534 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5535 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5536 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5537 for (i=0;i<nl;i++) { 5538 for (d=0;d<cdim;d++) { 5539 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5540 } 5541 } 5542 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5543 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5544 ierr = PetscFree(scoords);CHKERRQ(ierr); 5545 } 5546 if (sub_schurs && sub_schurs->reuse_solver) { 5547 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5548 5549 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5550 } 5551 5552 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5553 if (!n_D) { 5554 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5555 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5556 } 5557 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5558 /* set ksp_D into pcis data */ 5559 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5560 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5561 pcis->ksp_D = pcbddc->ksp_D; 5562 } 5563 5564 /* NEUMANN PROBLEM */ 5565 A_RR = 0; 5566 if (neumann) { 5567 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5568 PetscInt ibs,mbs; 5569 PetscBool issbaij, reuse_neumann_solver; 5570 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5571 5572 reuse_neumann_solver = PETSC_FALSE; 5573 if (sub_schurs && sub_schurs->reuse_solver) { 5574 IS iP; 5575 5576 reuse_neumann_solver = PETSC_TRUE; 5577 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5578 if (iP) reuse_neumann_solver = PETSC_FALSE; 5579 } 5580 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5581 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5582 if (pcbddc->ksp_R) { /* already created ksp */ 5583 PetscInt nn_R; 5584 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5585 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5586 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5587 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5588 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5589 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5590 reuse = MAT_INITIAL_MATRIX; 5591 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5592 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5593 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5594 reuse = MAT_INITIAL_MATRIX; 5595 } else { /* safe to reuse the matrix */ 5596 reuse = MAT_REUSE_MATRIX; 5597 } 5598 } 5599 /* last check */ 5600 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5601 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5602 reuse = MAT_INITIAL_MATRIX; 5603 } 5604 } else { /* first time, so we need to create the matrix */ 5605 reuse = MAT_INITIAL_MATRIX; 5606 } 5607 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5608 TODO: Get Rid of these conversions */ 5609 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5610 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5611 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5612 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5613 if (matis->A == pcbddc->local_mat) { 5614 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5615 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5616 } else { 5617 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5618 } 5619 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5620 if (matis->A == pcbddc->local_mat) { 5621 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5622 ierr = MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5623 } else { 5624 ierr = MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5625 } 5626 } 5627 /* extract A_RR */ 5628 if (reuse_neumann_solver) { 5629 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5630 5631 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5632 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5633 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5634 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5635 } else { 5636 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5637 } 5638 } else { 5639 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5640 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5641 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5642 } 5643 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5644 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5645 } 5646 if (pcbddc->local_mat->symmetric_set) { 5647 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5648 } 5649 opts = PETSC_FALSE; 5650 if (!pcbddc->ksp_R) { /* create object if not present */ 5651 opts = PETSC_TRUE; 5652 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5653 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5654 /* default */ 5655 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5656 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5657 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5658 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5659 if (issbaij) { 5660 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5661 } else { 5662 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5663 } 5664 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5665 } 5666 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5667 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5668 if (opts) { /* Allow user's customization once */ 5669 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5670 } 5671 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5672 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5673 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);CHKERRQ(ierr); 5674 } 5675 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5676 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5677 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5678 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5679 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5680 const PetscInt *idxs; 5681 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5682 5683 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5684 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5685 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5686 for (i=0;i<nl;i++) { 5687 for (d=0;d<cdim;d++) { 5688 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5689 } 5690 } 5691 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5692 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5693 ierr = PetscFree(scoords);CHKERRQ(ierr); 5694 } 5695 5696 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5697 if (!n_R) { 5698 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5699 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5700 } 5701 /* Reuse solver if it is present */ 5702 if (reuse_neumann_solver) { 5703 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5704 5705 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5706 } 5707 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5708 } 5709 5710 if (pcbddc->dbg_flag) { 5711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5712 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5713 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5714 } 5715 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5716 5717 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5718 if (pcbddc->NullSpace_corr[0]) { 5719 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5720 } 5721 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5722 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5723 } 5724 if (neumann && pcbddc->NullSpace_corr[2]) { 5725 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5726 } 5727 /* check Dirichlet and Neumann solvers */ 5728 if (pcbddc->dbg_flag) { 5729 if (dirichlet) { /* Dirichlet */ 5730 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5731 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5732 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5733 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 5734 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5735 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5736 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5737 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5738 } 5739 if (neumann) { /* Neumann */ 5740 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5741 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5742 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5743 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 5744 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5745 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5746 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5747 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5748 } 5749 } 5750 /* free Neumann problem's matrix */ 5751 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5752 PetscFunctionReturn(0); 5753 } 5754 5755 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5756 { 5757 PetscErrorCode ierr; 5758 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5759 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5760 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5761 5762 PetscFunctionBegin; 5763 if (!reuse_solver) { 5764 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5765 } 5766 if (!pcbddc->switch_static) { 5767 if (applytranspose && pcbddc->local_auxmat1) { 5768 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5769 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5770 } 5771 if (!reuse_solver) { 5772 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5773 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5774 } else { 5775 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5776 5777 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5778 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5779 } 5780 } else { 5781 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5782 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5783 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5784 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5785 if (applytranspose && pcbddc->local_auxmat1) { 5786 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5787 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5788 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5789 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5790 } 5791 } 5792 if (!reuse_solver || pcbddc->switch_static) { 5793 if (applytranspose) { 5794 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5795 } else { 5796 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5797 } 5798 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);CHKERRQ(ierr); 5799 } else { 5800 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5801 5802 if (applytranspose) { 5803 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5804 } else { 5805 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5806 } 5807 } 5808 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5809 if (!pcbddc->switch_static) { 5810 if (!reuse_solver) { 5811 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5812 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5813 } else { 5814 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5815 5816 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5817 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5818 } 5819 if (!applytranspose && pcbddc->local_auxmat1) { 5820 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5821 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5822 } 5823 } else { 5824 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5825 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5826 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5827 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5828 if (!applytranspose && pcbddc->local_auxmat1) { 5829 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5830 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5831 } 5832 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5833 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5834 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5835 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5836 } 5837 PetscFunctionReturn(0); 5838 } 5839 5840 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5841 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5842 { 5843 PetscErrorCode ierr; 5844 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5845 PC_IS* pcis = (PC_IS*) (pc->data); 5846 const PetscScalar zero = 0.0; 5847 5848 PetscFunctionBegin; 5849 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5850 if (!pcbddc->benign_apply_coarse_only) { 5851 if (applytranspose) { 5852 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5853 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5854 } else { 5855 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5856 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5857 } 5858 } else { 5859 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5860 } 5861 5862 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5863 if (pcbddc->benign_n) { 5864 PetscScalar *array; 5865 PetscInt j; 5866 5867 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5868 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5869 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5870 } 5871 5872 /* start communications from local primal nodes to rhs of coarse solver */ 5873 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5874 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5875 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5876 5877 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5878 if (pcbddc->coarse_ksp) { 5879 Mat coarse_mat; 5880 Vec rhs,sol; 5881 MatNullSpace nullsp; 5882 PetscBool isbddc = PETSC_FALSE; 5883 5884 if (pcbddc->benign_have_null) { 5885 PC coarse_pc; 5886 5887 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5888 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5889 /* we need to propagate to coarser levels the need for a possible benign correction */ 5890 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5891 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5892 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5893 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5894 } 5895 } 5896 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5897 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5898 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5899 if (applytranspose) { 5900 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5901 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5902 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5903 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5904 if (nullsp) { 5905 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5906 } 5907 } else { 5908 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5909 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5910 PC coarse_pc; 5911 5912 if (nullsp) { 5913 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5914 } 5915 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5916 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5917 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5918 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5919 } else { 5920 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5921 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5922 if (nullsp) { 5923 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5924 } 5925 } 5926 } 5927 /* we don't need the benign correction at coarser levels anymore */ 5928 if (pcbddc->benign_have_null && isbddc) { 5929 PC coarse_pc; 5930 PC_BDDC* coarsepcbddc; 5931 5932 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5933 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5934 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5935 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5936 } 5937 } 5938 5939 /* Local solution on R nodes */ 5940 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5941 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5942 } 5943 /* communications from coarse sol to local primal nodes */ 5944 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5945 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5946 5947 /* Sum contributions from the two levels */ 5948 if (!pcbddc->benign_apply_coarse_only) { 5949 if (applytranspose) { 5950 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5951 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5952 } else { 5953 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5954 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5955 } 5956 /* store p0 */ 5957 if (pcbddc->benign_n) { 5958 PetscScalar *array; 5959 PetscInt j; 5960 5961 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5962 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5963 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5964 } 5965 } else { /* expand the coarse solution */ 5966 if (applytranspose) { 5967 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5968 } else { 5969 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5970 } 5971 } 5972 PetscFunctionReturn(0); 5973 } 5974 5975 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5976 { 5977 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5978 Vec from,to; 5979 const PetscScalar *array; 5980 PetscErrorCode ierr; 5981 5982 PetscFunctionBegin; 5983 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5984 from = pcbddc->coarse_vec; 5985 to = pcbddc->vec1_P; 5986 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5987 Vec tvec; 5988 5989 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5990 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5991 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5992 ierr = VecGetArrayRead(tvec,&array);CHKERRQ(ierr); 5993 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5994 ierr = VecRestoreArrayRead(tvec,&array);CHKERRQ(ierr); 5995 } 5996 } else { /* from local to global -> put data in coarse right hand side */ 5997 from = pcbddc->vec1_P; 5998 to = pcbddc->coarse_vec; 5999 } 6000 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6001 PetscFunctionReturn(0); 6002 } 6003 6004 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6005 { 6006 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 6007 Vec from,to; 6008 const PetscScalar *array; 6009 PetscErrorCode ierr; 6010 6011 PetscFunctionBegin; 6012 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6013 from = pcbddc->coarse_vec; 6014 to = pcbddc->vec1_P; 6015 } else { /* from local to global -> put data in coarse right hand side */ 6016 from = pcbddc->vec1_P; 6017 to = pcbddc->coarse_vec; 6018 } 6019 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6020 if (smode == SCATTER_FORWARD) { 6021 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6022 Vec tvec; 6023 6024 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6025 ierr = VecGetArrayRead(to,&array);CHKERRQ(ierr); 6026 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 6027 ierr = VecRestoreArrayRead(to,&array);CHKERRQ(ierr); 6028 } 6029 } else { 6030 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6031 ierr = VecResetArray(from);CHKERRQ(ierr); 6032 } 6033 } 6034 PetscFunctionReturn(0); 6035 } 6036 6037 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6038 { 6039 PetscErrorCode ierr; 6040 PC_IS* pcis = (PC_IS*)(pc->data); 6041 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6042 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6043 /* one and zero */ 6044 PetscScalar one=1.0,zero=0.0; 6045 /* space to store constraints and their local indices */ 6046 PetscScalar *constraints_data; 6047 PetscInt *constraints_idxs,*constraints_idxs_B; 6048 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6049 PetscInt *constraints_n; 6050 /* iterators */ 6051 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6052 /* BLAS integers */ 6053 PetscBLASInt lwork,lierr; 6054 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6055 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6056 /* reuse */ 6057 PetscInt olocal_primal_size,olocal_primal_size_cc; 6058 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6059 /* change of basis */ 6060 PetscBool qr_needed; 6061 PetscBT change_basis,qr_needed_idx; 6062 /* auxiliary stuff */ 6063 PetscInt *nnz,*is_indices; 6064 PetscInt ncc; 6065 /* some quantities */ 6066 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6067 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6068 PetscReal tol; /* tolerance for retaining eigenmodes */ 6069 6070 PetscFunctionBegin; 6071 tol = PetscSqrtReal(PETSC_SMALL); 6072 /* Destroy Mat objects computed previously */ 6073 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6074 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6075 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 6076 /* save info on constraints from previous setup (if any) */ 6077 olocal_primal_size = pcbddc->local_primal_size; 6078 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6079 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 6080 ierr = PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);CHKERRQ(ierr); 6081 ierr = PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);CHKERRQ(ierr); 6082 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6083 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6084 6085 if (!pcbddc->adaptive_selection) { 6086 IS ISForVertices,*ISForFaces,*ISForEdges; 6087 MatNullSpace nearnullsp; 6088 const Vec *nearnullvecs; 6089 Vec *localnearnullsp; 6090 PetscScalar *array; 6091 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6092 PetscBool nnsp_has_cnst; 6093 /* LAPACK working arrays for SVD or POD */ 6094 PetscBool skip_lapack,boolforchange; 6095 PetscScalar *work; 6096 PetscReal *singular_vals; 6097 #if defined(PETSC_USE_COMPLEX) 6098 PetscReal *rwork; 6099 #endif 6100 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6101 PetscBLASInt dummy_int=1; 6102 PetscScalar dummy_scalar=1.; 6103 PetscBool use_pod = PETSC_FALSE; 6104 6105 /* MKL SVD with same input gives different results on different processes! */ 6106 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL) 6107 use_pod = PETSC_TRUE; 6108 #endif 6109 /* Get index sets for faces, edges and vertices from graph */ 6110 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 6111 /* print some info */ 6112 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6113 PetscInt nv; 6114 6115 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6116 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 6117 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6119 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6120 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 6121 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 6122 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6123 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6124 } 6125 6126 /* free unneeded index sets */ 6127 if (!pcbddc->use_vertices) { 6128 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6129 } 6130 if (!pcbddc->use_edges) { 6131 for (i=0;i<n_ISForEdges;i++) { 6132 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6133 } 6134 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6135 n_ISForEdges = 0; 6136 } 6137 if (!pcbddc->use_faces) { 6138 for (i=0;i<n_ISForFaces;i++) { 6139 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6140 } 6141 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6142 n_ISForFaces = 0; 6143 } 6144 6145 /* check if near null space is attached to global mat */ 6146 if (pcbddc->use_nnsp) { 6147 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 6148 } else nearnullsp = NULL; 6149 6150 if (nearnullsp) { 6151 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 6152 /* remove any stored info */ 6153 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 6154 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6155 /* store information for BDDC solver reuse */ 6156 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 6157 pcbddc->onearnullspace = nearnullsp; 6158 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6159 for (i=0;i<nnsp_size;i++) { 6160 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 6161 } 6162 } else { /* if near null space is not provided BDDC uses constants by default */ 6163 nnsp_size = 0; 6164 nnsp_has_cnst = PETSC_TRUE; 6165 } 6166 /* get max number of constraints on a single cc */ 6167 max_constraints = nnsp_size; 6168 if (nnsp_has_cnst) max_constraints++; 6169 6170 /* 6171 Evaluate maximum storage size needed by the procedure 6172 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6173 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6174 There can be multiple constraints per connected component 6175 */ 6176 n_vertices = 0; 6177 if (ISForVertices) { 6178 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 6179 } 6180 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6181 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 6182 6183 total_counts = n_ISForFaces+n_ISForEdges; 6184 total_counts *= max_constraints; 6185 total_counts += n_vertices; 6186 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 6187 6188 total_counts = 0; 6189 max_size_of_constraint = 0; 6190 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6191 IS used_is; 6192 if (i<n_ISForEdges) { 6193 used_is = ISForEdges[i]; 6194 } else { 6195 used_is = ISForFaces[i-n_ISForEdges]; 6196 } 6197 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 6198 total_counts += j; 6199 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6200 } 6201 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 6202 6203 /* get local part of global near null space vectors */ 6204 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 6205 for (k=0;k<nnsp_size;k++) { 6206 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 6207 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6208 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6209 } 6210 6211 /* whether or not to skip lapack calls */ 6212 skip_lapack = PETSC_TRUE; 6213 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6214 6215 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6216 if (!skip_lapack) { 6217 PetscScalar temp_work; 6218 6219 if (use_pod) { 6220 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6221 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 6222 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 6223 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 6224 #if defined(PETSC_USE_COMPLEX) 6225 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 6226 #endif 6227 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6228 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6229 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 6230 lwork = -1; 6231 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6232 #if !defined(PETSC_USE_COMPLEX) 6233 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6234 #else 6235 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6236 #endif 6237 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6238 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6239 } else { 6240 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6241 /* SVD */ 6242 PetscInt max_n,min_n; 6243 max_n = max_size_of_constraint; 6244 min_n = max_constraints; 6245 if (max_size_of_constraint < max_constraints) { 6246 min_n = max_size_of_constraint; 6247 max_n = max_constraints; 6248 } 6249 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 6250 #if defined(PETSC_USE_COMPLEX) 6251 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 6252 #endif 6253 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6254 lwork = -1; 6255 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 6256 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 6257 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 6258 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6259 #if !defined(PETSC_USE_COMPLEX) 6260 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6261 #else 6262 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6263 #endif 6264 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6265 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6266 #else 6267 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6268 #endif /* on missing GESVD */ 6269 } 6270 /* Allocate optimal workspace */ 6271 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 6272 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 6273 } 6274 /* Now we can loop on constraining sets */ 6275 total_counts = 0; 6276 constraints_idxs_ptr[0] = 0; 6277 constraints_data_ptr[0] = 0; 6278 /* vertices */ 6279 if (n_vertices) { 6280 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6281 ierr = PetscArraycpy(constraints_idxs,is_indices,n_vertices);CHKERRQ(ierr); 6282 for (i=0;i<n_vertices;i++) { 6283 constraints_n[total_counts] = 1; 6284 constraints_data[total_counts] = 1.0; 6285 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6286 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6287 total_counts++; 6288 } 6289 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6290 n_vertices = total_counts; 6291 } 6292 6293 /* edges and faces */ 6294 total_counts_cc = total_counts; 6295 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6296 IS used_is; 6297 PetscBool idxs_copied = PETSC_FALSE; 6298 6299 if (ncc<n_ISForEdges) { 6300 used_is = ISForEdges[ncc]; 6301 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6302 } else { 6303 used_is = ISForFaces[ncc-n_ISForEdges]; 6304 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6305 } 6306 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6307 6308 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6309 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6310 /* change of basis should not be performed on local periodic nodes */ 6311 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6312 if (nnsp_has_cnst) { 6313 PetscScalar quad_value; 6314 6315 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6316 idxs_copied = PETSC_TRUE; 6317 6318 if (!pcbddc->use_nnsp_true) { 6319 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6320 } else { 6321 quad_value = 1.0; 6322 } 6323 for (j=0;j<size_of_constraint;j++) { 6324 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6325 } 6326 temp_constraints++; 6327 total_counts++; 6328 } 6329 for (k=0;k<nnsp_size;k++) { 6330 PetscReal real_value; 6331 PetscScalar *ptr_to_data; 6332 6333 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6334 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6335 for (j=0;j<size_of_constraint;j++) { 6336 ptr_to_data[j] = array[is_indices[j]]; 6337 } 6338 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6339 /* check if array is null on the connected component */ 6340 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6341 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6342 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6343 temp_constraints++; 6344 total_counts++; 6345 if (!idxs_copied) { 6346 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6347 idxs_copied = PETSC_TRUE; 6348 } 6349 } 6350 } 6351 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6352 valid_constraints = temp_constraints; 6353 if (!pcbddc->use_nnsp_true && temp_constraints) { 6354 if (temp_constraints == 1) { /* just normalize the constraint */ 6355 PetscScalar norm,*ptr_to_data; 6356 6357 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6358 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6359 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6360 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6361 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6362 } else { /* perform SVD */ 6363 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6364 6365 if (use_pod) { 6366 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6367 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6368 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6369 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6370 from that computed using LAPACKgesvd 6371 -> This is due to a different computation of eigenvectors in LAPACKheev 6372 -> The quality of the POD-computed basis will be the same */ 6373 ierr = PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);CHKERRQ(ierr); 6374 /* Store upper triangular part of correlation matrix */ 6375 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6376 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6377 for (j=0;j<temp_constraints;j++) { 6378 for (k=0;k<j+1;k++) { 6379 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6380 } 6381 } 6382 /* compute eigenvalues and eigenvectors of correlation matrix */ 6383 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6384 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6385 #if !defined(PETSC_USE_COMPLEX) 6386 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6387 #else 6388 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6389 #endif 6390 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6391 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6392 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6393 j = 0; 6394 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6395 total_counts = total_counts-j; 6396 valid_constraints = temp_constraints-j; 6397 /* scale and copy POD basis into used quadrature memory */ 6398 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6399 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6400 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6401 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6402 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6403 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6404 if (j<temp_constraints) { 6405 PetscInt ii; 6406 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6407 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6408 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6409 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6410 for (k=0;k<temp_constraints-j;k++) { 6411 for (ii=0;ii<size_of_constraint;ii++) { 6412 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6413 } 6414 } 6415 } 6416 } else { 6417 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6418 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6419 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6420 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6421 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6422 #if !defined(PETSC_USE_COMPLEX) 6423 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6424 #else 6425 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6426 #endif 6427 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6428 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6429 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6430 k = temp_constraints; 6431 if (k > size_of_constraint) k = size_of_constraint; 6432 j = 0; 6433 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6434 valid_constraints = k-j; 6435 total_counts = total_counts-temp_constraints+valid_constraints; 6436 #else 6437 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6438 #endif /* on missing GESVD */ 6439 } 6440 } 6441 } 6442 /* update pointers information */ 6443 if (valid_constraints) { 6444 constraints_n[total_counts_cc] = valid_constraints; 6445 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6446 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6447 /* set change_of_basis flag */ 6448 if (boolforchange) { 6449 PetscBTSet(change_basis,total_counts_cc); 6450 } 6451 total_counts_cc++; 6452 } 6453 } 6454 /* free workspace */ 6455 if (!skip_lapack) { 6456 ierr = PetscFree(work);CHKERRQ(ierr); 6457 #if defined(PETSC_USE_COMPLEX) 6458 ierr = PetscFree(rwork);CHKERRQ(ierr); 6459 #endif 6460 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6461 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6462 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6463 } 6464 for (k=0;k<nnsp_size;k++) { 6465 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6466 } 6467 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6468 /* free index sets of faces, edges and vertices */ 6469 for (i=0;i<n_ISForFaces;i++) { 6470 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6471 } 6472 if (n_ISForFaces) { 6473 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6474 } 6475 for (i=0;i<n_ISForEdges;i++) { 6476 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6477 } 6478 if (n_ISForEdges) { 6479 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6480 } 6481 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6482 } else { 6483 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6484 6485 total_counts = 0; 6486 n_vertices = 0; 6487 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6488 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6489 } 6490 max_constraints = 0; 6491 total_counts_cc = 0; 6492 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6493 total_counts += pcbddc->adaptive_constraints_n[i]; 6494 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6495 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6496 } 6497 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6498 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6499 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6500 constraints_data = pcbddc->adaptive_constraints_data; 6501 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6502 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6503 total_counts_cc = 0; 6504 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6505 if (pcbddc->adaptive_constraints_n[i]) { 6506 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6507 } 6508 } 6509 6510 max_size_of_constraint = 0; 6511 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6512 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6513 /* Change of basis */ 6514 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6515 if (pcbddc->use_change_of_basis) { 6516 for (i=0;i<sub_schurs->n_subs;i++) { 6517 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6518 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6519 } 6520 } 6521 } 6522 } 6523 pcbddc->local_primal_size = total_counts; 6524 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6525 6526 /* map constraints_idxs in boundary numbering */ 6527 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6528 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6529 6530 /* Create constraint matrix */ 6531 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6532 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6533 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6534 6535 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6536 /* determine if a QR strategy is needed for change of basis */ 6537 qr_needed = pcbddc->use_qr_single; 6538 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6539 total_primal_vertices=0; 6540 pcbddc->local_primal_size_cc = 0; 6541 for (i=0;i<total_counts_cc;i++) { 6542 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6543 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6544 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6545 pcbddc->local_primal_size_cc += 1; 6546 } else if (PetscBTLookup(change_basis,i)) { 6547 for (k=0;k<constraints_n[i];k++) { 6548 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6549 } 6550 pcbddc->local_primal_size_cc += constraints_n[i]; 6551 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6552 PetscBTSet(qr_needed_idx,i); 6553 qr_needed = PETSC_TRUE; 6554 } 6555 } else { 6556 pcbddc->local_primal_size_cc += 1; 6557 } 6558 } 6559 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6560 pcbddc->n_vertices = total_primal_vertices; 6561 /* permute indices in order to have a sorted set of vertices */ 6562 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6563 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6564 ierr = PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);CHKERRQ(ierr); 6565 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6566 6567 /* nonzero structure of constraint matrix */ 6568 /* and get reference dof for local constraints */ 6569 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6570 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6571 6572 j = total_primal_vertices; 6573 total_counts = total_primal_vertices; 6574 cum = total_primal_vertices; 6575 for (i=n_vertices;i<total_counts_cc;i++) { 6576 if (!PetscBTLookup(change_basis,i)) { 6577 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6578 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6579 cum++; 6580 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6581 for (k=0;k<constraints_n[i];k++) { 6582 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6583 nnz[j+k] = size_of_constraint; 6584 } 6585 j += constraints_n[i]; 6586 } 6587 } 6588 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6589 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6590 ierr = PetscFree(nnz);CHKERRQ(ierr); 6591 6592 /* set values in constraint matrix */ 6593 for (i=0;i<total_primal_vertices;i++) { 6594 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6595 } 6596 total_counts = total_primal_vertices; 6597 for (i=n_vertices;i<total_counts_cc;i++) { 6598 if (!PetscBTLookup(change_basis,i)) { 6599 PetscInt *cols; 6600 6601 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6602 cols = constraints_idxs+constraints_idxs_ptr[i]; 6603 for (k=0;k<constraints_n[i];k++) { 6604 PetscInt row = total_counts+k; 6605 PetscScalar *vals; 6606 6607 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6608 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6609 } 6610 total_counts += constraints_n[i]; 6611 } 6612 } 6613 /* assembling */ 6614 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6615 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6616 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6617 6618 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6619 if (pcbddc->use_change_of_basis) { 6620 /* dual and primal dofs on a single cc */ 6621 PetscInt dual_dofs,primal_dofs; 6622 /* working stuff for GEQRF */ 6623 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6624 PetscBLASInt lqr_work; 6625 /* working stuff for UNGQR */ 6626 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6627 PetscBLASInt lgqr_work; 6628 /* working stuff for TRTRS */ 6629 PetscScalar *trs_rhs = NULL; 6630 PetscBLASInt Blas_NRHS; 6631 /* pointers for values insertion into change of basis matrix */ 6632 PetscInt *start_rows,*start_cols; 6633 PetscScalar *start_vals; 6634 /* working stuff for values insertion */ 6635 PetscBT is_primal; 6636 PetscInt *aux_primal_numbering_B; 6637 /* matrix sizes */ 6638 PetscInt global_size,local_size; 6639 /* temporary change of basis */ 6640 Mat localChangeOfBasisMatrix; 6641 /* extra space for debugging */ 6642 PetscScalar *dbg_work = NULL; 6643 6644 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6645 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6646 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6647 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6648 /* nonzeros for local mat */ 6649 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6650 if (!pcbddc->benign_change || pcbddc->fake_change) { 6651 for (i=0;i<pcis->n;i++) nnz[i]=1; 6652 } else { 6653 const PetscInt *ii; 6654 PetscInt n; 6655 PetscBool flg_row; 6656 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6657 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6658 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6659 } 6660 for (i=n_vertices;i<total_counts_cc;i++) { 6661 if (PetscBTLookup(change_basis,i)) { 6662 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6663 if (PetscBTLookup(qr_needed_idx,i)) { 6664 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6665 } else { 6666 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6667 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6668 } 6669 } 6670 } 6671 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6672 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6673 ierr = PetscFree(nnz);CHKERRQ(ierr); 6674 /* Set interior change in the matrix */ 6675 if (!pcbddc->benign_change || pcbddc->fake_change) { 6676 for (i=0;i<pcis->n;i++) { 6677 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6678 } 6679 } else { 6680 const PetscInt *ii,*jj; 6681 PetscScalar *aa; 6682 PetscInt n; 6683 PetscBool flg_row; 6684 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6685 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6686 for (i=0;i<n;i++) { 6687 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6688 } 6689 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6690 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6691 } 6692 6693 if (pcbddc->dbg_flag) { 6694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6696 } 6697 6698 6699 /* Now we loop on the constraints which need a change of basis */ 6700 /* 6701 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6702 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6703 6704 Basic blocks of change of basis matrix T computed by 6705 6706 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6707 6708 | 1 0 ... 0 s_1/S | 6709 | 0 1 ... 0 s_2/S | 6710 | ... | 6711 | 0 ... 1 s_{n-1}/S | 6712 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6713 6714 with S = \sum_{i=1}^n s_i^2 6715 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6716 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6717 6718 - QR decomposition of constraints otherwise 6719 */ 6720 if (qr_needed && max_size_of_constraint) { 6721 /* space to store Q */ 6722 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6723 /* array to store scaling factors for reflectors */ 6724 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6725 /* first we issue queries for optimal work */ 6726 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6727 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6728 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6729 lqr_work = -1; 6730 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6731 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6732 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6733 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6734 lgqr_work = -1; 6735 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6736 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6737 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6738 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6739 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6740 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6741 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6742 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6743 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6744 /* array to store rhs and solution of triangular solver */ 6745 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6746 /* allocating workspace for check */ 6747 if (pcbddc->dbg_flag) { 6748 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6749 } 6750 } 6751 /* array to store whether a node is primal or not */ 6752 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6753 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6754 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6755 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6756 for (i=0;i<total_primal_vertices;i++) { 6757 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6758 } 6759 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6760 6761 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6762 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6763 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6764 if (PetscBTLookup(change_basis,total_counts)) { 6765 /* get constraint info */ 6766 primal_dofs = constraints_n[total_counts]; 6767 dual_dofs = size_of_constraint-primal_dofs; 6768 6769 if (pcbddc->dbg_flag) { 6770 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6771 } 6772 6773 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6774 6775 /* copy quadrature constraints for change of basis check */ 6776 if (pcbddc->dbg_flag) { 6777 ierr = PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6778 } 6779 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6780 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6781 6782 /* compute QR decomposition of constraints */ 6783 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6784 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6785 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6786 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6787 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6788 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6789 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6790 6791 /* explictly compute R^-T */ 6792 ierr = PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);CHKERRQ(ierr); 6793 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6794 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6795 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6796 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6797 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6798 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6799 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6800 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6801 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6802 6803 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6804 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6805 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6806 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6807 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6808 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6809 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6810 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6811 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6812 6813 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6814 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6815 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6817 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6818 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6819 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6820 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6821 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6822 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6823 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6824 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6825 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6826 6827 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6828 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6829 /* insert cols for primal dofs */ 6830 for (j=0;j<primal_dofs;j++) { 6831 start_vals = &qr_basis[j*size_of_constraint]; 6832 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6833 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6834 } 6835 /* insert cols for dual dofs */ 6836 for (j=0,k=0;j<dual_dofs;k++) { 6837 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6838 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6839 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6840 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6841 j++; 6842 } 6843 } 6844 6845 /* check change of basis */ 6846 if (pcbddc->dbg_flag) { 6847 PetscInt ii,jj; 6848 PetscBool valid_qr=PETSC_TRUE; 6849 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6850 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6851 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6853 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6854 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6855 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6856 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6857 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6858 for (jj=0;jj<size_of_constraint;jj++) { 6859 for (ii=0;ii<primal_dofs;ii++) { 6860 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6861 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6862 } 6863 } 6864 if (!valid_qr) { 6865 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6866 for (jj=0;jj<size_of_constraint;jj++) { 6867 for (ii=0;ii<primal_dofs;ii++) { 6868 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6869 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6870 } 6871 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6873 } 6874 } 6875 } 6876 } else { 6877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6878 } 6879 } 6880 } else { /* simple transformation block */ 6881 PetscInt row,col; 6882 PetscScalar val,norm; 6883 6884 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6885 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6886 for (j=0;j<size_of_constraint;j++) { 6887 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6888 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6889 if (!PetscBTLookup(is_primal,row_B)) { 6890 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6891 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6892 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6893 } else { 6894 for (k=0;k<size_of_constraint;k++) { 6895 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6896 if (row != col) { 6897 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6898 } else { 6899 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6900 } 6901 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6902 } 6903 } 6904 } 6905 if (pcbddc->dbg_flag) { 6906 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6907 } 6908 } 6909 } else { 6910 if (pcbddc->dbg_flag) { 6911 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6912 } 6913 } 6914 } 6915 6916 /* free workspace */ 6917 if (qr_needed) { 6918 if (pcbddc->dbg_flag) { 6919 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6920 } 6921 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6922 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6923 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6924 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6925 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6926 } 6927 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6928 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6929 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6930 6931 /* assembling of global change of variable */ 6932 if (!pcbddc->fake_change) { 6933 Mat tmat; 6934 PetscInt bs; 6935 6936 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6937 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6938 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6939 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6940 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6941 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6942 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6943 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6944 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6945 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6946 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6947 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6948 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6949 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6950 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6951 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6952 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6953 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6954 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6955 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6956 6957 /* check */ 6958 if (pcbddc->dbg_flag) { 6959 PetscReal error; 6960 Vec x,x_change; 6961 6962 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6963 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6964 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6965 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6966 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6967 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6968 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6969 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6970 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6971 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6972 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6973 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6974 if (error > PETSC_SMALL) { 6975 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6976 } 6977 ierr = VecDestroy(&x);CHKERRQ(ierr); 6978 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6979 } 6980 /* adapt sub_schurs computed (if any) */ 6981 if (pcbddc->use_deluxe_scaling) { 6982 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6983 6984 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6985 if (sub_schurs && sub_schurs->S_Ej_all) { 6986 Mat S_new,tmat; 6987 IS is_all_N,is_V_Sall = NULL; 6988 6989 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6990 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6991 if (pcbddc->deluxe_zerorows) { 6992 ISLocalToGlobalMapping NtoSall; 6993 IS is_V; 6994 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6995 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6996 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6997 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6998 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6999 } 7000 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 7001 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7002 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 7003 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7004 if (pcbddc->deluxe_zerorows) { 7005 const PetscScalar *array; 7006 const PetscInt *idxs_V,*idxs_all; 7007 PetscInt i,n_V; 7008 7009 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7010 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 7011 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7012 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7013 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 7014 for (i=0;i<n_V;i++) { 7015 PetscScalar val; 7016 PetscInt idx; 7017 7018 idx = idxs_V[i]; 7019 val = array[idxs_all[idxs_V[i]]]; 7020 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 7021 } 7022 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7023 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7024 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 7025 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7026 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7027 } 7028 sub_schurs->S_Ej_all = S_new; 7029 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7030 if (sub_schurs->sum_S_Ej_all) { 7031 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7032 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 7033 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7034 if (pcbddc->deluxe_zerorows) { 7035 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7036 } 7037 sub_schurs->sum_S_Ej_all = S_new; 7038 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7039 } 7040 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 7041 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 7042 } 7043 /* destroy any change of basis context in sub_schurs */ 7044 if (sub_schurs && sub_schurs->change) { 7045 PetscInt i; 7046 7047 for (i=0;i<sub_schurs->n_subs;i++) { 7048 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 7049 } 7050 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 7051 } 7052 } 7053 if (pcbddc->switch_static) { /* need to save the local change */ 7054 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7055 } else { 7056 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 7057 } 7058 /* determine if any process has changed the pressures locally */ 7059 pcbddc->change_interior = pcbddc->benign_have_null; 7060 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7061 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 7062 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7063 pcbddc->use_qr_single = qr_needed; 7064 } 7065 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7066 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7067 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 7068 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7069 } else { 7070 Mat benign_global = NULL; 7071 if (pcbddc->benign_have_null) { 7072 Mat M; 7073 7074 pcbddc->change_interior = PETSC_TRUE; 7075 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 7076 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 7077 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 7078 if (pcbddc->benign_change) { 7079 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 7080 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 7081 } else { 7082 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 7083 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 7084 } 7085 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 7086 ierr = MatDestroy(&M);CHKERRQ(ierr); 7087 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7088 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7089 } 7090 if (pcbddc->user_ChangeOfBasisMatrix) { 7091 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 7092 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 7093 } else if (pcbddc->benign_have_null) { 7094 pcbddc->ChangeOfBasisMatrix = benign_global; 7095 } 7096 } 7097 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7098 IS is_global; 7099 const PetscInt *gidxs; 7100 7101 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 7103 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7104 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 7105 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 7106 } 7107 } 7108 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7109 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 7110 } 7111 7112 if (!pcbddc->fake_change) { 7113 /* add pressure dofs to set of primal nodes for numbering purposes */ 7114 for (i=0;i<pcbddc->benign_n;i++) { 7115 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7116 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7117 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7118 pcbddc->local_primal_size_cc++; 7119 pcbddc->local_primal_size++; 7120 } 7121 7122 /* check if a new primal space has been introduced (also take into account benign trick) */ 7123 pcbddc->new_primal_space_local = PETSC_TRUE; 7124 if (olocal_primal_size == pcbddc->local_primal_size) { 7125 ierr = PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7126 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7127 if (!pcbddc->new_primal_space_local) { 7128 ierr = PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7129 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7130 } 7131 } 7132 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7133 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7134 } 7135 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 7136 7137 /* flush dbg viewer */ 7138 if (pcbddc->dbg_flag) { 7139 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7140 } 7141 7142 /* free workspace */ 7143 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 7144 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 7145 if (!pcbddc->adaptive_selection) { 7146 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 7147 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 7148 } else { 7149 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7150 pcbddc->adaptive_constraints_idxs_ptr, 7151 pcbddc->adaptive_constraints_data_ptr, 7152 pcbddc->adaptive_constraints_idxs, 7153 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 7154 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 7155 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 7156 } 7157 PetscFunctionReturn(0); 7158 } 7159 7160 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7161 { 7162 ISLocalToGlobalMapping map; 7163 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7164 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7165 PetscInt i,N; 7166 PetscBool rcsr = PETSC_FALSE; 7167 PetscErrorCode ierr; 7168 7169 PetscFunctionBegin; 7170 if (pcbddc->recompute_topography) { 7171 pcbddc->graphanalyzed = PETSC_FALSE; 7172 /* Reset previously computed graph */ 7173 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 7174 /* Init local Graph struct */ 7175 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 7176 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 7177 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 7178 7179 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7180 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7181 } 7182 /* Check validity of the csr graph passed in by the user */ 7183 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7184 7185 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7186 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7187 PetscInt *xadj,*adjncy; 7188 PetscInt nvtxs; 7189 PetscBool flg_row=PETSC_FALSE; 7190 7191 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7192 if (flg_row) { 7193 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 7194 pcbddc->computed_rowadj = PETSC_TRUE; 7195 } 7196 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7197 rcsr = PETSC_TRUE; 7198 } 7199 if (pcbddc->dbg_flag) { 7200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7201 } 7202 7203 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7204 PetscReal *lcoords; 7205 PetscInt n; 7206 MPI_Datatype dimrealtype; 7207 7208 /* TODO: support for blocked */ 7209 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7210 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7211 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 7212 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 7213 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 7214 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 7215 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 7216 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 7217 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 7218 7219 pcbddc->mat_graph->coords = lcoords; 7220 pcbddc->mat_graph->cloc = PETSC_TRUE; 7221 pcbddc->mat_graph->cnloc = n; 7222 } 7223 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7224 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 7225 7226 /* Setup of Graph */ 7227 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7228 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7229 7230 /* attach info on disconnected subdomains if present */ 7231 if (pcbddc->n_local_subs) { 7232 PetscInt *local_subs,n,totn; 7233 7234 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7235 ierr = PetscMalloc1(n,&local_subs);CHKERRQ(ierr); 7236 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7237 for (i=0;i<pcbddc->n_local_subs;i++) { 7238 const PetscInt *idxs; 7239 PetscInt nl,j; 7240 7241 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 7242 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7243 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7244 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7245 } 7246 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7247 pcbddc->mat_graph->n_local_subs = totn + 1; 7248 pcbddc->mat_graph->local_subs = local_subs; 7249 } 7250 } 7251 7252 if (!pcbddc->graphanalyzed) { 7253 /* Graph's connected components analysis */ 7254 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 7255 pcbddc->graphanalyzed = PETSC_TRUE; 7256 pcbddc->corner_selected = pcbddc->corner_selection; 7257 } 7258 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7259 PetscFunctionReturn(0); 7260 } 7261 7262 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7263 { 7264 PetscInt i,j,n; 7265 PetscScalar *alphas; 7266 PetscReal norm,*onorms; 7267 PetscErrorCode ierr; 7268 7269 PetscFunctionBegin; 7270 n = *nio; 7271 if (!n) PetscFunctionReturn(0); 7272 ierr = PetscMalloc2(n,&alphas,n,&onorms);CHKERRQ(ierr); 7273 ierr = VecNormalize(vecs[0],&norm);CHKERRQ(ierr); 7274 if (norm < PETSC_SMALL) { 7275 onorms[0] = 0.0; 7276 ierr = VecSet(vecs[0],0.0);CHKERRQ(ierr); 7277 } else { 7278 onorms[0] = norm; 7279 } 7280 7281 for (i=1;i<n;i++) { 7282 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7283 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7284 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7285 ierr = VecNormalize(vecs[i],&norm);CHKERRQ(ierr); 7286 if (norm < PETSC_SMALL) { 7287 onorms[i] = 0.0; 7288 ierr = VecSet(vecs[i],0.0);CHKERRQ(ierr); 7289 } else { 7290 onorms[i] = norm; 7291 } 7292 } 7293 /* push nonzero vectors at the beginning */ 7294 for (i=0;i<n;i++) { 7295 if (onorms[i] == 0.0) { 7296 for (j=i+1;j<n;j++) { 7297 if (onorms[j] != 0.0) { 7298 ierr = VecCopy(vecs[j],vecs[i]);CHKERRQ(ierr); 7299 onorms[j] = 0.0; 7300 } 7301 } 7302 } 7303 } 7304 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7305 ierr = PetscFree2(alphas,onorms);CHKERRQ(ierr); 7306 PetscFunctionReturn(0); 7307 } 7308 7309 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7310 { 7311 Mat A; 7312 PetscInt n_neighs,*neighs,*n_shared,**shared; 7313 PetscMPIInt size,rank,color; 7314 PetscInt *xadj,*adjncy; 7315 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7316 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7317 PetscInt void_procs,*procs_candidates = NULL; 7318 PetscInt xadj_count,*count; 7319 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7320 PetscSubcomm psubcomm; 7321 MPI_Comm subcomm; 7322 PetscErrorCode ierr; 7323 7324 PetscFunctionBegin; 7325 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7326 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7327 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7328 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7329 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7330 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7331 7332 if (have_void) *have_void = PETSC_FALSE; 7333 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7334 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7335 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7336 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7337 im_active = !!n; 7338 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7339 void_procs = size - active_procs; 7340 /* get ranks of of non-active processes in mat communicator */ 7341 if (void_procs) { 7342 PetscInt ncand; 7343 7344 if (have_void) *have_void = PETSC_TRUE; 7345 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7346 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7347 for (i=0,ncand=0;i<size;i++) { 7348 if (!procs_candidates[i]) { 7349 procs_candidates[ncand++] = i; 7350 } 7351 } 7352 /* force n_subdomains to be not greater that the number of non-active processes */ 7353 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7354 } 7355 7356 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7357 number of subdomains requested 1 -> send to master or first candidate in voids */ 7358 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7359 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7360 PetscInt issize,isidx,dest; 7361 if (*n_subdomains == 1) dest = 0; 7362 else dest = rank; 7363 if (im_active) { 7364 issize = 1; 7365 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7366 isidx = procs_candidates[dest]; 7367 } else { 7368 isidx = dest; 7369 } 7370 } else { 7371 issize = 0; 7372 isidx = -1; 7373 } 7374 if (*n_subdomains != 1) *n_subdomains = active_procs; 7375 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7376 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7377 PetscFunctionReturn(0); 7378 } 7379 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7380 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7381 threshold = PetscMax(threshold,2); 7382 7383 /* Get info on mapping */ 7384 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7385 7386 /* build local CSR graph of subdomains' connectivity */ 7387 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7388 xadj[0] = 0; 7389 xadj[1] = PetscMax(n_neighs-1,0); 7390 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7391 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7392 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7393 for (i=1;i<n_neighs;i++) 7394 for (j=0;j<n_shared[i];j++) 7395 count[shared[i][j]] += 1; 7396 7397 xadj_count = 0; 7398 for (i=1;i<n_neighs;i++) { 7399 for (j=0;j<n_shared[i];j++) { 7400 if (count[shared[i][j]] < threshold) { 7401 adjncy[xadj_count] = neighs[i]; 7402 adjncy_wgt[xadj_count] = n_shared[i]; 7403 xadj_count++; 7404 break; 7405 } 7406 } 7407 } 7408 xadj[1] = xadj_count; 7409 ierr = PetscFree(count);CHKERRQ(ierr); 7410 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7411 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7412 7413 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7414 7415 /* Restrict work on active processes only */ 7416 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7417 if (void_procs) { 7418 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7419 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7420 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7421 subcomm = PetscSubcommChild(psubcomm); 7422 } else { 7423 psubcomm = NULL; 7424 subcomm = PetscObjectComm((PetscObject)mat); 7425 } 7426 7427 v_wgt = NULL; 7428 if (!color) { 7429 ierr = PetscFree(xadj);CHKERRQ(ierr); 7430 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7431 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7432 } else { 7433 Mat subdomain_adj; 7434 IS new_ranks,new_ranks_contig; 7435 MatPartitioning partitioner; 7436 PetscInt rstart=0,rend=0; 7437 PetscInt *is_indices,*oldranks; 7438 PetscMPIInt size; 7439 PetscBool aggregate; 7440 7441 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7442 if (void_procs) { 7443 PetscInt prank = rank; 7444 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7445 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7446 for (i=0;i<xadj[1];i++) { 7447 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7448 } 7449 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7450 } else { 7451 oldranks = NULL; 7452 } 7453 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7454 if (aggregate) { /* TODO: all this part could be made more efficient */ 7455 PetscInt lrows,row,ncols,*cols; 7456 PetscMPIInt nrank; 7457 PetscScalar *vals; 7458 7459 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7460 lrows = 0; 7461 if (nrank<redprocs) { 7462 lrows = size/redprocs; 7463 if (nrank<size%redprocs) lrows++; 7464 } 7465 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7466 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7467 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7468 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7469 row = nrank; 7470 ncols = xadj[1]-xadj[0]; 7471 cols = adjncy; 7472 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7473 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7474 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7475 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7476 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7477 ierr = PetscFree(xadj);CHKERRQ(ierr); 7478 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7479 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7480 ierr = PetscFree(vals);CHKERRQ(ierr); 7481 if (use_vwgt) { 7482 Vec v; 7483 const PetscScalar *array; 7484 PetscInt nl; 7485 7486 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7487 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7488 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7489 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7490 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7491 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7492 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7493 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7494 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7495 ierr = VecDestroy(&v);CHKERRQ(ierr); 7496 } 7497 } else { 7498 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7499 if (use_vwgt) { 7500 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7501 v_wgt[0] = n; 7502 } 7503 } 7504 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7505 7506 /* Partition */ 7507 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7508 #if defined(PETSC_HAVE_PTSCOTCH) 7509 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);CHKERRQ(ierr); 7510 #elif defined(PETSC_HAVE_PARMETIS) 7511 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); 7512 #else 7513 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);CHKERRQ(ierr); 7514 #endif 7515 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7516 if (v_wgt) { 7517 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7518 } 7519 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7520 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7521 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7522 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7523 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7524 7525 /* renumber new_ranks to avoid "holes" in new set of processors */ 7526 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7527 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7528 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7529 if (!aggregate) { 7530 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7531 #if defined(PETSC_USE_DEBUG) 7532 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7533 #endif 7534 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7535 } else if (oldranks) { 7536 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7537 } else { 7538 ranks_send_to_idx[0] = is_indices[0]; 7539 } 7540 } else { 7541 PetscInt idx = 0; 7542 PetscMPIInt tag; 7543 MPI_Request *reqs; 7544 7545 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7546 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7547 for (i=rstart;i<rend;i++) { 7548 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7549 } 7550 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7551 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7552 ierr = PetscFree(reqs);CHKERRQ(ierr); 7553 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7554 #if defined(PETSC_USE_DEBUG) 7555 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7556 #endif 7557 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7558 } else if (oldranks) { 7559 ranks_send_to_idx[0] = oldranks[idx]; 7560 } else { 7561 ranks_send_to_idx[0] = idx; 7562 } 7563 } 7564 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7565 /* clean up */ 7566 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7567 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7568 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7569 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7570 } 7571 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7572 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7573 7574 /* assemble parallel IS for sends */ 7575 i = 1; 7576 if (!color) i=0; 7577 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7578 PetscFunctionReturn(0); 7579 } 7580 7581 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7582 7583 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7584 { 7585 Mat local_mat; 7586 IS is_sends_internal; 7587 PetscInt rows,cols,new_local_rows; 7588 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7589 PetscBool ismatis,isdense,newisdense,destroy_mat; 7590 ISLocalToGlobalMapping l2gmap; 7591 PetscInt* l2gmap_indices; 7592 const PetscInt* is_indices; 7593 MatType new_local_type; 7594 /* buffers */ 7595 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7596 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7597 PetscInt *recv_buffer_idxs_local; 7598 PetscScalar *ptr_vals,*recv_buffer_vals; 7599 const PetscScalar *send_buffer_vals; 7600 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7601 /* MPI */ 7602 MPI_Comm comm,comm_n; 7603 PetscSubcomm subcomm; 7604 PetscMPIInt n_sends,n_recvs,size; 7605 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7606 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7607 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7608 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7609 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7610 PetscErrorCode ierr; 7611 7612 PetscFunctionBegin; 7613 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7614 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7615 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7616 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7617 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7618 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7619 PetscValidLogicalCollectiveBool(mat,reuse,6); 7620 PetscValidLogicalCollectiveInt(mat,nis,8); 7621 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7622 if (nvecs) { 7623 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7624 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7625 } 7626 /* further checks */ 7627 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7628 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7629 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7630 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7631 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7632 if (reuse && *mat_n) { 7633 PetscInt mrows,mcols,mnrows,mncols; 7634 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7635 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7636 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7637 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7638 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7639 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7640 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7641 } 7642 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7643 PetscValidLogicalCollectiveInt(mat,bs,0); 7644 7645 /* prepare IS for sending if not provided */ 7646 if (!is_sends) { 7647 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7648 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7649 } else { 7650 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7651 is_sends_internal = is_sends; 7652 } 7653 7654 /* get comm */ 7655 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7656 7657 /* compute number of sends */ 7658 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7659 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7660 7661 /* compute number of receives */ 7662 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 7663 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7664 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 7665 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7666 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7667 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7668 ierr = PetscFree(iflags);CHKERRQ(ierr); 7669 7670 /* restrict comm if requested */ 7671 subcomm = 0; 7672 destroy_mat = PETSC_FALSE; 7673 if (restrict_comm) { 7674 PetscMPIInt color,subcommsize; 7675 7676 color = 0; 7677 if (restrict_full) { 7678 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7679 } else { 7680 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7681 } 7682 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7683 subcommsize = size - subcommsize; 7684 /* check if reuse has been requested */ 7685 if (reuse) { 7686 if (*mat_n) { 7687 PetscMPIInt subcommsize2; 7688 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7689 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7690 comm_n = PetscObjectComm((PetscObject)*mat_n); 7691 } else { 7692 comm_n = PETSC_COMM_SELF; 7693 } 7694 } else { /* MAT_INITIAL_MATRIX */ 7695 PetscMPIInt rank; 7696 7697 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7698 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7699 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7700 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7701 comm_n = PetscSubcommChild(subcomm); 7702 } 7703 /* flag to destroy *mat_n if not significative */ 7704 if (color) destroy_mat = PETSC_TRUE; 7705 } else { 7706 comm_n = comm; 7707 } 7708 7709 /* prepare send/receive buffers */ 7710 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7711 ierr = PetscArrayzero(ilengths_idxs,size);CHKERRQ(ierr); 7712 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7713 ierr = PetscArrayzero(ilengths_vals,size);CHKERRQ(ierr); 7714 if (nis) { 7715 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7716 } 7717 7718 /* Get data from local matrices */ 7719 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7720 /* TODO: See below some guidelines on how to prepare the local buffers */ 7721 /* 7722 send_buffer_vals should contain the raw values of the local matrix 7723 send_buffer_idxs should contain: 7724 - MatType_PRIVATE type 7725 - PetscInt size_of_l2gmap 7726 - PetscInt global_row_indices[size_of_l2gmap] 7727 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7728 */ 7729 else { 7730 ierr = MatDenseGetArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7731 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7732 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7733 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7734 send_buffer_idxs[1] = i; 7735 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7736 ierr = PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);CHKERRQ(ierr); 7737 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7738 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7739 for (i=0;i<n_sends;i++) { 7740 ilengths_vals[is_indices[i]] = len*len; 7741 ilengths_idxs[is_indices[i]] = len+2; 7742 } 7743 } 7744 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7745 /* additional is (if any) */ 7746 if (nis) { 7747 PetscMPIInt psum; 7748 PetscInt j; 7749 for (j=0,psum=0;j<nis;j++) { 7750 PetscInt plen; 7751 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7752 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7753 psum += len+1; /* indices + lenght */ 7754 } 7755 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7756 for (j=0,psum=0;j<nis;j++) { 7757 PetscInt plen; 7758 const PetscInt *is_array_idxs; 7759 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7760 send_buffer_idxs_is[psum] = plen; 7761 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7762 ierr = PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);CHKERRQ(ierr); 7763 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7764 psum += plen+1; /* indices + lenght */ 7765 } 7766 for (i=0;i<n_sends;i++) { 7767 ilengths_idxs_is[is_indices[i]] = psum; 7768 } 7769 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7770 } 7771 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7772 7773 buf_size_idxs = 0; 7774 buf_size_vals = 0; 7775 buf_size_idxs_is = 0; 7776 buf_size_vecs = 0; 7777 for (i=0;i<n_recvs;i++) { 7778 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7779 buf_size_vals += (PetscInt)olengths_vals[i]; 7780 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7781 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7782 } 7783 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7784 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7785 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7786 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7787 7788 /* get new tags for clean communications */ 7789 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7790 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7791 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7792 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7793 7794 /* allocate for requests */ 7795 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7796 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7797 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7798 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7799 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7800 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7801 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7802 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7803 7804 /* communications */ 7805 ptr_idxs = recv_buffer_idxs; 7806 ptr_vals = recv_buffer_vals; 7807 ptr_idxs_is = recv_buffer_idxs_is; 7808 ptr_vecs = recv_buffer_vecs; 7809 for (i=0;i<n_recvs;i++) { 7810 source_dest = onodes[i]; 7811 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7812 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7813 ptr_idxs += olengths_idxs[i]; 7814 ptr_vals += olengths_vals[i]; 7815 if (nis) { 7816 source_dest = onodes_is[i]; 7817 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7818 ptr_idxs_is += olengths_idxs_is[i]; 7819 } 7820 if (nvecs) { 7821 source_dest = onodes[i]; 7822 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7823 ptr_vecs += olengths_idxs[i]-2; 7824 } 7825 } 7826 for (i=0;i<n_sends;i++) { 7827 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7828 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7829 ierr = MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7830 if (nis) { 7831 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7832 } 7833 if (nvecs) { 7834 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7835 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7836 } 7837 } 7838 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7839 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7840 7841 /* assemble new l2g map */ 7842 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7843 ptr_idxs = recv_buffer_idxs; 7844 new_local_rows = 0; 7845 for (i=0;i<n_recvs;i++) { 7846 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7847 ptr_idxs += olengths_idxs[i]; 7848 } 7849 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7850 ptr_idxs = recv_buffer_idxs; 7851 new_local_rows = 0; 7852 for (i=0;i<n_recvs;i++) { 7853 ierr = PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));CHKERRQ(ierr); 7854 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7855 ptr_idxs += olengths_idxs[i]; 7856 } 7857 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7858 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7859 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7860 7861 /* infer new local matrix type from received local matrices type */ 7862 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7863 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7864 if (n_recvs) { 7865 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7866 ptr_idxs = recv_buffer_idxs; 7867 for (i=0;i<n_recvs;i++) { 7868 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7869 new_local_type_private = MATAIJ_PRIVATE; 7870 break; 7871 } 7872 ptr_idxs += olengths_idxs[i]; 7873 } 7874 switch (new_local_type_private) { 7875 case MATDENSE_PRIVATE: 7876 new_local_type = MATSEQAIJ; 7877 bs = 1; 7878 break; 7879 case MATAIJ_PRIVATE: 7880 new_local_type = MATSEQAIJ; 7881 bs = 1; 7882 break; 7883 case MATBAIJ_PRIVATE: 7884 new_local_type = MATSEQBAIJ; 7885 break; 7886 case MATSBAIJ_PRIVATE: 7887 new_local_type = MATSEQSBAIJ; 7888 break; 7889 default: 7890 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7891 break; 7892 } 7893 } else { /* by default, new_local_type is seqaij */ 7894 new_local_type = MATSEQAIJ; 7895 bs = 1; 7896 } 7897 7898 /* create MATIS object if needed */ 7899 if (!reuse) { 7900 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7901 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7902 } else { 7903 /* it also destroys the local matrices */ 7904 if (*mat_n) { 7905 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7906 } else { /* this is a fake object */ 7907 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7908 } 7909 } 7910 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7911 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7912 7913 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7914 7915 /* Global to local map of received indices */ 7916 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7917 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7918 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7919 7920 /* restore attributes -> type of incoming data and its size */ 7921 buf_size_idxs = 0; 7922 for (i=0;i<n_recvs;i++) { 7923 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7924 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7925 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7926 } 7927 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7928 7929 /* set preallocation */ 7930 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7931 if (!newisdense) { 7932 PetscInt *new_local_nnz=0; 7933 7934 ptr_idxs = recv_buffer_idxs_local; 7935 if (n_recvs) { 7936 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7937 } 7938 for (i=0;i<n_recvs;i++) { 7939 PetscInt j; 7940 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7941 for (j=0;j<*(ptr_idxs+1);j++) { 7942 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7943 } 7944 } else { 7945 /* TODO */ 7946 } 7947 ptr_idxs += olengths_idxs[i]; 7948 } 7949 if (new_local_nnz) { 7950 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7951 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7952 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7953 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7954 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7955 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7956 } else { 7957 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7958 } 7959 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7960 } else { 7961 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7962 } 7963 7964 /* set values */ 7965 ptr_vals = recv_buffer_vals; 7966 ptr_idxs = recv_buffer_idxs_local; 7967 for (i=0;i<n_recvs;i++) { 7968 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7969 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7970 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7971 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7972 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7973 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7974 } else { 7975 /* TODO */ 7976 } 7977 ptr_idxs += olengths_idxs[i]; 7978 ptr_vals += olengths_vals[i]; 7979 } 7980 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7981 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7982 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7983 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7984 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7985 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7986 7987 #if 0 7988 if (!restrict_comm) { /* check */ 7989 Vec lvec,rvec; 7990 PetscReal infty_error; 7991 7992 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7993 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7994 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7995 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7996 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7997 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7998 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7999 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 8000 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 8001 } 8002 #endif 8003 8004 /* assemble new additional is (if any) */ 8005 if (nis) { 8006 PetscInt **temp_idxs,*count_is,j,psum; 8007 8008 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8009 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 8010 ptr_idxs = recv_buffer_idxs_is; 8011 psum = 0; 8012 for (i=0;i<n_recvs;i++) { 8013 for (j=0;j<nis;j++) { 8014 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8015 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8016 psum += plen; 8017 ptr_idxs += plen+1; /* shift pointer to received data */ 8018 } 8019 } 8020 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 8021 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 8022 for (i=1;i<nis;i++) { 8023 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 8024 } 8025 ierr = PetscArrayzero(count_is,nis);CHKERRQ(ierr); 8026 ptr_idxs = recv_buffer_idxs_is; 8027 for (i=0;i<n_recvs;i++) { 8028 for (j=0;j<nis;j++) { 8029 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8030 ierr = PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);CHKERRQ(ierr); 8031 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8032 ptr_idxs += plen+1; /* shift pointer to received data */ 8033 } 8034 } 8035 for (i=0;i<nis;i++) { 8036 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8037 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr); 8038 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8039 } 8040 ierr = PetscFree(count_is);CHKERRQ(ierr); 8041 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 8042 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 8043 } 8044 /* free workspace */ 8045 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 8046 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8047 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 8048 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8049 if (isdense) { 8050 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 8051 ierr = MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 8052 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 8053 } else { 8054 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 8055 } 8056 if (nis) { 8057 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8058 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 8059 } 8060 8061 if (nvecs) { 8062 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8063 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8064 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8065 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8066 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 8067 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 8068 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 8069 /* set values */ 8070 ptr_vals = recv_buffer_vecs; 8071 ptr_idxs = recv_buffer_idxs_local; 8072 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8073 for (i=0;i<n_recvs;i++) { 8074 PetscInt j; 8075 for (j=0;j<*(ptr_idxs+1);j++) { 8076 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8077 } 8078 ptr_idxs += olengths_idxs[i]; 8079 ptr_vals += olengths_idxs[i]-2; 8080 } 8081 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8082 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 8083 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 8084 } 8085 8086 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 8087 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 8088 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 8089 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 8090 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 8091 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 8092 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 8093 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 8094 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 8095 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 8096 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 8097 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 8098 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 8099 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 8100 ierr = PetscFree(onodes);CHKERRQ(ierr); 8101 if (nis) { 8102 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 8103 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 8104 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 8105 } 8106 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 8107 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8108 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 8109 for (i=0;i<nis;i++) { 8110 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8111 } 8112 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8113 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8114 } 8115 *mat_n = NULL; 8116 } 8117 PetscFunctionReturn(0); 8118 } 8119 8120 /* temporary hack into ksp private data structure */ 8121 #include <petsc/private/kspimpl.h> 8122 8123 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8124 { 8125 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8126 PC_IS *pcis = (PC_IS*)pc->data; 8127 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8128 Mat coarsedivudotp = NULL; 8129 Mat coarseG,t_coarse_mat_is; 8130 MatNullSpace CoarseNullSpace = NULL; 8131 ISLocalToGlobalMapping coarse_islg; 8132 IS coarse_is,*isarray,corners; 8133 PetscInt i,im_active=-1,active_procs=-1; 8134 PetscInt nis,nisdofs,nisneu,nisvert; 8135 PetscInt coarse_eqs_per_proc; 8136 PC pc_temp; 8137 PCType coarse_pc_type; 8138 KSPType coarse_ksp_type; 8139 PetscBool multilevel_requested,multilevel_allowed; 8140 PetscBool coarse_reuse; 8141 PetscInt ncoarse,nedcfield; 8142 PetscBool compute_vecs = PETSC_FALSE; 8143 PetscScalar *array; 8144 MatReuse coarse_mat_reuse; 8145 PetscBool restr, full_restr, have_void; 8146 PetscMPIInt size; 8147 PetscErrorCode ierr; 8148 8149 PetscFunctionBegin; 8150 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8151 /* Assign global numbering to coarse dofs */ 8152 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8153 PetscInt ocoarse_size; 8154 compute_vecs = PETSC_TRUE; 8155 8156 pcbddc->new_primal_space = PETSC_TRUE; 8157 ocoarse_size = pcbddc->coarse_size; 8158 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 8159 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 8160 /* see if we can avoid some work */ 8161 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8162 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8163 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8164 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 8165 coarse_reuse = PETSC_FALSE; 8166 } else { /* we can safely reuse already computed coarse matrix */ 8167 coarse_reuse = PETSC_TRUE; 8168 } 8169 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8170 coarse_reuse = PETSC_FALSE; 8171 } 8172 /* reset any subassembling information */ 8173 if (!coarse_reuse || pcbddc->recompute_topography) { 8174 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8175 } 8176 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8177 coarse_reuse = PETSC_TRUE; 8178 } 8179 if (coarse_reuse && pcbddc->coarse_ksp) { 8180 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 8181 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 8182 coarse_mat_reuse = MAT_REUSE_MATRIX; 8183 } else { 8184 coarse_mat = NULL; 8185 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8186 } 8187 8188 /* creates temporary l2gmap and IS for coarse indexes */ 8189 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 8190 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 8191 8192 /* creates temporary MATIS object for coarse matrix */ 8193 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 8194 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 8195 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 8196 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8197 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8198 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 8199 8200 /* count "active" (i.e. with positive local size) and "void" processes */ 8201 im_active = !!(pcis->n); 8202 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8203 8204 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8205 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8206 /* full_restr : just use the receivers from the subassembling pattern */ 8207 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 8208 coarse_mat_is = NULL; 8209 multilevel_allowed = PETSC_FALSE; 8210 multilevel_requested = PETSC_FALSE; 8211 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8212 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8213 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8214 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8215 if (multilevel_requested) { 8216 ncoarse = active_procs/pcbddc->coarsening_ratio; 8217 restr = PETSC_FALSE; 8218 full_restr = PETSC_FALSE; 8219 } else { 8220 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8221 restr = PETSC_TRUE; 8222 full_restr = PETSC_TRUE; 8223 } 8224 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8225 ncoarse = PetscMax(1,ncoarse); 8226 if (!pcbddc->coarse_subassembling) { 8227 if (pcbddc->coarsening_ratio > 1) { 8228 if (multilevel_requested) { 8229 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8230 } else { 8231 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8232 } 8233 } else { 8234 PetscMPIInt rank; 8235 8236 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 8237 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8238 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8239 } 8240 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8241 PetscInt psum; 8242 if (pcbddc->coarse_ksp) psum = 1; 8243 else psum = 0; 8244 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8245 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8246 } 8247 /* determine if we can go multilevel */ 8248 if (multilevel_requested) { 8249 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8250 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8251 } 8252 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8253 8254 /* dump subassembling pattern */ 8255 if (pcbddc->dbg_flag && multilevel_allowed) { 8256 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 8257 } 8258 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8259 nedcfield = -1; 8260 corners = NULL; 8261 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8262 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8263 const PetscInt *idxs; 8264 ISLocalToGlobalMapping tmap; 8265 8266 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8267 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 8268 /* allocate space for temporary storage */ 8269 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 8270 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 8271 /* allocate for IS array */ 8272 nisdofs = pcbddc->n_ISForDofsLocal; 8273 if (pcbddc->nedclocal) { 8274 if (pcbddc->nedfield > -1) { 8275 nedcfield = pcbddc->nedfield; 8276 } else { 8277 nedcfield = 0; 8278 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8279 nisdofs = 1; 8280 } 8281 } 8282 nisneu = !!pcbddc->NeumannBoundariesLocal; 8283 nisvert = 0; /* nisvert is not used */ 8284 nis = nisdofs + nisneu + nisvert; 8285 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 8286 /* dofs splitting */ 8287 for (i=0;i<nisdofs;i++) { 8288 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 8289 if (nedcfield != i) { 8290 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 8291 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8292 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8293 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8294 } else { 8295 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8296 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8297 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8298 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8299 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8300 } 8301 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8302 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8303 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8304 } 8305 /* neumann boundaries */ 8306 if (pcbddc->NeumannBoundariesLocal) { 8307 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8308 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8309 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8310 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8311 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8312 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8313 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8314 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8315 } 8316 /* coordinates */ 8317 if (pcbddc->corner_selected) { 8318 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8319 ierr = ISGetLocalSize(corners,&tsize);CHKERRQ(ierr); 8320 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8321 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8322 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8323 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8324 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8325 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8326 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);CHKERRQ(ierr); 8327 } 8328 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8329 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8330 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8331 } else { 8332 nis = 0; 8333 nisdofs = 0; 8334 nisneu = 0; 8335 nisvert = 0; 8336 isarray = NULL; 8337 } 8338 /* destroy no longer needed map */ 8339 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8340 8341 /* subassemble */ 8342 if (multilevel_allowed) { 8343 Vec vp[1]; 8344 PetscInt nvecs = 0; 8345 PetscBool reuse,reuser; 8346 8347 if (coarse_mat) reuse = PETSC_TRUE; 8348 else reuse = PETSC_FALSE; 8349 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8350 vp[0] = NULL; 8351 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8352 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8353 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8354 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8355 nvecs = 1; 8356 8357 if (pcbddc->divudotp) { 8358 Mat B,loc_divudotp; 8359 Vec v,p; 8360 IS dummy; 8361 PetscInt np; 8362 8363 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8364 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8365 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8366 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8367 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8368 ierr = VecSet(p,1.);CHKERRQ(ierr); 8369 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8370 ierr = VecDestroy(&p);CHKERRQ(ierr); 8371 ierr = MatDestroy(&B);CHKERRQ(ierr); 8372 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8373 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8374 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8375 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8376 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8377 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8378 ierr = VecDestroy(&v);CHKERRQ(ierr); 8379 } 8380 } 8381 if (reuser) { 8382 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8383 } else { 8384 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8385 } 8386 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8387 PetscScalar *arraym; 8388 const PetscScalar *arrayv; 8389 PetscInt nl; 8390 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8391 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8392 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8393 ierr = VecGetArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8394 ierr = PetscArraycpy(arraym,arrayv,nl);CHKERRQ(ierr); 8395 ierr = VecRestoreArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8396 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8397 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8398 } else { 8399 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8400 } 8401 } else { 8402 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8403 } 8404 if (coarse_mat_is || coarse_mat) { 8405 if (!multilevel_allowed) { 8406 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8407 } else { 8408 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8409 if (coarse_mat_is) { 8410 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8411 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8412 coarse_mat = coarse_mat_is; 8413 } 8414 } 8415 } 8416 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8417 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8418 8419 /* create local to global scatters for coarse problem */ 8420 if (compute_vecs) { 8421 PetscInt lrows; 8422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8423 if (coarse_mat) { 8424 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8425 } else { 8426 lrows = 0; 8427 } 8428 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8429 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8430 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8431 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8432 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8433 } 8434 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8435 8436 /* set defaults for coarse KSP and PC */ 8437 if (multilevel_allowed) { 8438 coarse_ksp_type = KSPRICHARDSON; 8439 coarse_pc_type = PCBDDC; 8440 } else { 8441 coarse_ksp_type = KSPPREONLY; 8442 coarse_pc_type = PCREDUNDANT; 8443 } 8444 8445 /* print some info if requested */ 8446 if (pcbddc->dbg_flag) { 8447 if (!multilevel_allowed) { 8448 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8449 if (multilevel_requested) { 8450 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8451 } else if (pcbddc->max_levels) { 8452 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8453 } 8454 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8455 } 8456 } 8457 8458 /* communicate coarse discrete gradient */ 8459 coarseG = NULL; 8460 if (pcbddc->nedcG && multilevel_allowed) { 8461 MPI_Comm ccomm; 8462 if (coarse_mat) { 8463 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8464 } else { 8465 ccomm = MPI_COMM_NULL; 8466 } 8467 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8468 } 8469 8470 /* create the coarse KSP object only once with defaults */ 8471 if (coarse_mat) { 8472 PetscBool isredundant,isbddc,force,valid; 8473 PetscViewer dbg_viewer = NULL; 8474 8475 if (pcbddc->dbg_flag) { 8476 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8477 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8478 } 8479 if (!pcbddc->coarse_ksp) { 8480 char prefix[256],str_level[16]; 8481 size_t len; 8482 8483 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8484 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8485 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8486 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8487 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8488 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8489 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8490 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8491 /* TODO is this logic correct? should check for coarse_mat type */ 8492 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8493 /* prefix */ 8494 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8495 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8496 if (!pcbddc->current_level) { 8497 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8498 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8499 } else { 8500 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8501 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8502 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8503 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8504 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8505 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8506 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8507 } 8508 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8509 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8510 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8511 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8512 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8513 /* allow user customization */ 8514 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8515 /* get some info after set from options */ 8516 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8517 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8518 force = PETSC_FALSE; 8519 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8520 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8521 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8522 if (multilevel_allowed && !force && !valid) { 8523 isbddc = PETSC_TRUE; 8524 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8525 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8526 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8527 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8528 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8529 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);CHKERRQ(ierr); 8530 ierr = (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);CHKERRQ(ierr); 8531 ierr = PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);CHKERRQ(ierr); 8532 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8533 pc_temp->setfromoptionscalled++; 8534 } 8535 } 8536 } 8537 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8538 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8539 if (nisdofs) { 8540 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8541 for (i=0;i<nisdofs;i++) { 8542 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8543 } 8544 } 8545 if (nisneu) { 8546 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8547 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8548 } 8549 if (nisvert) { 8550 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8551 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8552 } 8553 if (coarseG) { 8554 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8555 } 8556 8557 /* get some info after set from options */ 8558 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8559 8560 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8561 if (isbddc && !multilevel_allowed) { 8562 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8563 } 8564 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8565 force = PETSC_FALSE; 8566 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8567 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8568 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8569 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8570 } 8571 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8572 if (isredundant) { 8573 KSP inner_ksp; 8574 PC inner_pc; 8575 8576 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8577 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8578 } 8579 8580 /* parameters which miss an API */ 8581 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8582 if (isbddc) { 8583 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8584 8585 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8586 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8587 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8588 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8589 if (pcbddc_coarse->benign_saddle_point) { 8590 Mat coarsedivudotp_is; 8591 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8592 IS row,col; 8593 const PetscInt *gidxs; 8594 PetscInt n,st,M,N; 8595 8596 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8597 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8598 st = st-n; 8599 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8600 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8601 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8602 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8603 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8604 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8605 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8606 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8607 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8608 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8609 ierr = ISDestroy(&row);CHKERRQ(ierr); 8610 ierr = ISDestroy(&col);CHKERRQ(ierr); 8611 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8612 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8613 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8614 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8615 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8616 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8617 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8618 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8619 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8620 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8621 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8622 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8623 } 8624 } 8625 8626 /* propagate symmetry info of coarse matrix */ 8627 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8628 if (pc->pmat->symmetric_set) { 8629 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8630 } 8631 if (pc->pmat->hermitian_set) { 8632 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8633 } 8634 if (pc->pmat->spd_set) { 8635 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8636 } 8637 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8638 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8639 } 8640 /* set operators */ 8641 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8642 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8643 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8644 if (pcbddc->dbg_flag) { 8645 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8646 } 8647 } 8648 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8649 ierr = PetscFree(isarray);CHKERRQ(ierr); 8650 #if 0 8651 { 8652 PetscViewer viewer; 8653 char filename[256]; 8654 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8655 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8656 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8657 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8658 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8659 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8660 } 8661 #endif 8662 8663 if (corners) { 8664 Vec gv; 8665 IS is; 8666 const PetscInt *idxs; 8667 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8668 PetscScalar *coords; 8669 8670 if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8671 ierr = VecGetSize(pcbddc->coarse_vec,&N);CHKERRQ(ierr); 8672 ierr = VecGetLocalSize(pcbddc->coarse_vec,&n);CHKERRQ(ierr); 8673 ierr = VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);CHKERRQ(ierr); 8674 ierr = VecSetBlockSize(gv,cdim);CHKERRQ(ierr); 8675 ierr = VecSetSizes(gv,n*cdim,N*cdim);CHKERRQ(ierr); 8676 ierr = VecSetType(gv,VECSTANDARD);CHKERRQ(ierr); 8677 ierr = VecSetFromOptions(gv);CHKERRQ(ierr); 8678 ierr = VecSet(gv,PETSC_MAX_REAL);CHKERRQ(ierr); /* we only propagate coordinates from vertices constraints */ 8679 8680 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8681 ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); 8682 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 8683 ierr = PetscMalloc1(n*cdim,&coords);CHKERRQ(ierr); 8684 for (i=0;i<n;i++) { 8685 for (d=0;d<cdim;d++) { 8686 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8687 } 8688 } 8689 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 8690 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8691 8692 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 8693 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8694 ierr = VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);CHKERRQ(ierr); 8695 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8696 ierr = PetscFree(coords);CHKERRQ(ierr); 8697 ierr = VecAssemblyBegin(gv);CHKERRQ(ierr); 8698 ierr = VecAssemblyEnd(gv);CHKERRQ(ierr); 8699 ierr = VecGetArray(gv,&coords);CHKERRQ(ierr); 8700 if (pcbddc->coarse_ksp) { 8701 PC coarse_pc; 8702 PetscBool isbddc; 8703 8704 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 8705 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 8706 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8707 PetscReal *realcoords; 8708 8709 ierr = VecGetLocalSize(gv,&n);CHKERRQ(ierr); 8710 #if defined(PETSC_USE_COMPLEX) 8711 ierr = PetscMalloc1(n,&realcoords);CHKERRQ(ierr); 8712 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8713 #else 8714 realcoords = coords; 8715 #endif 8716 ierr = PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);CHKERRQ(ierr); 8717 #if defined(PETSC_USE_COMPLEX) 8718 ierr = PetscFree(realcoords);CHKERRQ(ierr); 8719 #endif 8720 } 8721 } 8722 ierr = VecRestoreArray(gv,&coords);CHKERRQ(ierr); 8723 ierr = VecDestroy(&gv);CHKERRQ(ierr); 8724 } 8725 ierr = ISDestroy(&corners);CHKERRQ(ierr); 8726 8727 if (pcbddc->coarse_ksp) { 8728 Vec crhs,csol; 8729 8730 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8731 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8732 if (!csol) { 8733 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8734 } 8735 if (!crhs) { 8736 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8737 } 8738 } 8739 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8740 8741 /* compute null space for coarse solver if the benign trick has been requested */ 8742 if (pcbddc->benign_null) { 8743 8744 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8745 for (i=0;i<pcbddc->benign_n;i++) { 8746 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8747 } 8748 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8749 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8750 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8751 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8752 if (coarse_mat) { 8753 Vec nullv; 8754 PetscScalar *array,*array2; 8755 PetscInt nl; 8756 8757 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8758 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8759 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8760 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8761 ierr = PetscArraycpy(array2,array,nl);CHKERRQ(ierr); 8762 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8763 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8764 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8765 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8766 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8767 } 8768 } 8769 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8770 8771 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8772 if (pcbddc->coarse_ksp) { 8773 PetscBool ispreonly; 8774 8775 if (CoarseNullSpace) { 8776 PetscBool isnull; 8777 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8778 if (isnull) { 8779 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8780 } 8781 /* TODO: add local nullspaces (if any) */ 8782 } 8783 /* setup coarse ksp */ 8784 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8785 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8786 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8787 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8788 KSP check_ksp; 8789 KSPType check_ksp_type; 8790 PC check_pc; 8791 Vec check_vec,coarse_vec; 8792 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8793 PetscInt its; 8794 PetscBool compute_eigs; 8795 PetscReal *eigs_r,*eigs_c; 8796 PetscInt neigs; 8797 const char *prefix; 8798 8799 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8800 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8801 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8802 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8803 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8804 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8805 /* prevent from setup unneeded object */ 8806 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8807 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8808 if (ispreonly) { 8809 check_ksp_type = KSPPREONLY; 8810 compute_eigs = PETSC_FALSE; 8811 } else { 8812 check_ksp_type = KSPGMRES; 8813 compute_eigs = PETSC_TRUE; 8814 } 8815 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8816 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8817 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8818 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8819 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8820 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8821 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8822 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8823 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8824 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8825 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8826 /* create random vec */ 8827 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8828 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8829 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8830 /* solve coarse problem */ 8831 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8832 ierr = KSPCheckSolve(check_ksp,pc,coarse_vec);CHKERRQ(ierr); 8833 /* set eigenvalue estimation if preonly has not been requested */ 8834 if (compute_eigs) { 8835 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8836 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8837 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8838 if (neigs) { 8839 lambda_max = eigs_r[neigs-1]; 8840 lambda_min = eigs_r[0]; 8841 if (pcbddc->use_coarse_estimates) { 8842 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8843 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8844 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8845 } 8846 } 8847 } 8848 } 8849 8850 /* check coarse problem residual error */ 8851 if (pcbddc->dbg_flag) { 8852 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8853 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8854 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8855 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8856 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8857 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8858 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8859 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8860 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8861 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8862 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8863 if (CoarseNullSpace) { 8864 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8865 } 8866 if (compute_eigs) { 8867 PetscReal lambda_max_s,lambda_min_s; 8868 KSPConvergedReason reason; 8869 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8870 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8871 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8872 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8873 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8874 for (i=0;i<neigs;i++) { 8875 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8876 } 8877 } 8878 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8879 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8880 } 8881 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8882 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8883 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8884 if (compute_eigs) { 8885 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8886 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8887 } 8888 } 8889 } 8890 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8891 /* print additional info */ 8892 if (pcbddc->dbg_flag) { 8893 /* waits until all processes reaches this point */ 8894 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8895 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8897 } 8898 8899 /* free memory */ 8900 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8901 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8902 PetscFunctionReturn(0); 8903 } 8904 8905 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8906 { 8907 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8908 PC_IS* pcis = (PC_IS*)pc->data; 8909 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8910 IS subset,subset_mult,subset_n; 8911 PetscInt local_size,coarse_size=0; 8912 PetscInt *local_primal_indices=NULL; 8913 const PetscInt *t_local_primal_indices; 8914 PetscErrorCode ierr; 8915 8916 PetscFunctionBegin; 8917 /* Compute global number of coarse dofs */ 8918 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8919 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8920 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8921 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8922 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8923 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8924 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8925 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8926 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8927 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8928 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8929 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8930 ierr = PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);CHKERRQ(ierr); 8931 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8932 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8933 8934 /* check numbering */ 8935 if (pcbddc->dbg_flag) { 8936 PetscScalar coarsesum,*array,*array2; 8937 PetscInt i; 8938 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8939 8940 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8941 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8942 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8943 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8944 /* counter */ 8945 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8946 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8947 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8948 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8949 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8950 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8951 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8952 for (i=0;i<pcbddc->local_primal_size;i++) { 8953 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8954 } 8955 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8956 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8957 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8958 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8959 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8960 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8961 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8962 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8963 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8964 for (i=0;i<pcis->n;i++) { 8965 if (array[i] != 0.0 && array[i] != array2[i]) { 8966 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8967 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8968 set_error = PETSC_TRUE; 8969 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8971 } 8972 } 8973 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8974 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8975 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8976 for (i=0;i<pcis->n;i++) { 8977 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8978 } 8979 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8980 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8981 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8982 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8983 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8984 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8985 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8986 PetscInt *gidxs; 8987 8988 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8989 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8990 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8991 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8993 for (i=0;i<pcbddc->local_primal_size;i++) { 8994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8995 } 8996 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8997 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8998 } 8999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9000 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9001 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 9002 } 9003 9004 /* get back data */ 9005 *coarse_size_n = coarse_size; 9006 *local_primal_indices_n = local_primal_indices; 9007 PetscFunctionReturn(0); 9008 } 9009 9010 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 9011 { 9012 IS localis_t; 9013 PetscInt i,lsize,*idxs,n; 9014 PetscScalar *vals; 9015 PetscErrorCode ierr; 9016 9017 PetscFunctionBegin; 9018 /* get indices in local ordering exploiting local to global map */ 9019 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 9020 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 9021 for (i=0;i<lsize;i++) vals[i] = 1.0; 9022 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9023 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 9024 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 9025 if (idxs) { /* multilevel guard */ 9026 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 9027 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 9028 } 9029 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 9030 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9031 ierr = PetscFree(vals);CHKERRQ(ierr); 9032 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 9033 /* now compute set in local ordering */ 9034 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9035 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9036 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9037 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 9038 for (i=0,lsize=0;i<n;i++) { 9039 if (PetscRealPart(vals[i]) > 0.5) { 9040 lsize++; 9041 } 9042 } 9043 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 9044 for (i=0,lsize=0;i<n;i++) { 9045 if (PetscRealPart(vals[i]) > 0.5) { 9046 idxs[lsize++] = i; 9047 } 9048 } 9049 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9050 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 9051 *localis = localis_t; 9052 PetscFunctionReturn(0); 9053 } 9054 9055 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9056 { 9057 PC_IS *pcis=(PC_IS*)pc->data; 9058 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9059 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9060 Mat S_j; 9061 PetscInt *used_xadj,*used_adjncy; 9062 PetscBool free_used_adj; 9063 PetscErrorCode ierr; 9064 9065 PetscFunctionBegin; 9066 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9067 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9068 free_used_adj = PETSC_FALSE; 9069 if (pcbddc->sub_schurs_layers == -1) { 9070 used_xadj = NULL; 9071 used_adjncy = NULL; 9072 } else { 9073 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9074 used_xadj = pcbddc->mat_graph->xadj; 9075 used_adjncy = pcbddc->mat_graph->adjncy; 9076 } else if (pcbddc->computed_rowadj) { 9077 used_xadj = pcbddc->mat_graph->xadj; 9078 used_adjncy = pcbddc->mat_graph->adjncy; 9079 } else { 9080 PetscBool flg_row=PETSC_FALSE; 9081 const PetscInt *xadj,*adjncy; 9082 PetscInt nvtxs; 9083 9084 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9085 if (flg_row) { 9086 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 9087 ierr = PetscArraycpy(used_xadj,xadj,nvtxs+1);CHKERRQ(ierr); 9088 ierr = PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);CHKERRQ(ierr); 9089 free_used_adj = PETSC_TRUE; 9090 } else { 9091 pcbddc->sub_schurs_layers = -1; 9092 used_xadj = NULL; 9093 used_adjncy = NULL; 9094 } 9095 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9096 } 9097 } 9098 9099 /* setup sub_schurs data */ 9100 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9101 if (!sub_schurs->schur_explicit) { 9102 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9103 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9104 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 9105 } else { 9106 Mat change = NULL; 9107 Vec scaling = NULL; 9108 IS change_primal = NULL, iP; 9109 PetscInt benign_n; 9110 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9111 PetscBool need_change = PETSC_FALSE; 9112 PetscBool discrete_harmonic = PETSC_FALSE; 9113 9114 if (!pcbddc->use_vertices && reuse_solvers) { 9115 PetscInt n_vertices; 9116 9117 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 9118 reuse_solvers = (PetscBool)!n_vertices; 9119 } 9120 if (!pcbddc->benign_change_explicit) { 9121 benign_n = pcbddc->benign_n; 9122 } else { 9123 benign_n = 0; 9124 } 9125 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9126 We need a global reduction to avoid possible deadlocks. 9127 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9128 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9129 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9130 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 9131 need_change = (PetscBool)(!need_change); 9132 } 9133 /* If the user defines additional constraints, we import them here. 9134 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9135 if (need_change) { 9136 PC_IS *pcisf; 9137 PC_BDDC *pcbddcf; 9138 PC pcf; 9139 9140 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9141 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 9142 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 9143 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 9144 9145 /* hacks */ 9146 pcisf = (PC_IS*)pcf->data; 9147 pcisf->is_B_local = pcis->is_B_local; 9148 pcisf->vec1_N = pcis->vec1_N; 9149 pcisf->BtoNmap = pcis->BtoNmap; 9150 pcisf->n = pcis->n; 9151 pcisf->n_B = pcis->n_B; 9152 pcbddcf = (PC_BDDC*)pcf->data; 9153 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 9154 pcbddcf->mat_graph = pcbddc->mat_graph; 9155 pcbddcf->use_faces = PETSC_TRUE; 9156 pcbddcf->use_change_of_basis = PETSC_TRUE; 9157 pcbddcf->use_change_on_faces = PETSC_TRUE; 9158 pcbddcf->use_qr_single = PETSC_TRUE; 9159 pcbddcf->fake_change = PETSC_TRUE; 9160 9161 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9162 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 9163 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9164 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 9165 change = pcbddcf->ConstraintMatrix; 9166 pcbddcf->ConstraintMatrix = NULL; 9167 9168 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9169 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 9170 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 9171 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 9172 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 9173 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 9174 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 9175 pcf->ops->destroy = NULL; 9176 pcf->ops->reset = NULL; 9177 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 9178 } 9179 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9180 9181 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 9182 if (iP) { 9183 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 9184 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 9185 ierr = PetscOptionsEnd();CHKERRQ(ierr); 9186 } 9187 if (discrete_harmonic) { 9188 Mat A; 9189 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 9190 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 9191 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 9192 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9193 ierr = MatDestroy(&A);CHKERRQ(ierr); 9194 } else { 9195 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9196 } 9197 ierr = MatDestroy(&change);CHKERRQ(ierr); 9198 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 9199 } 9200 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9201 9202 /* free adjacency */ 9203 if (free_used_adj) { 9204 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 9205 } 9206 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9207 PetscFunctionReturn(0); 9208 } 9209 9210 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9211 { 9212 PC_IS *pcis=(PC_IS*)pc->data; 9213 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9214 PCBDDCGraph graph; 9215 PetscErrorCode ierr; 9216 9217 PetscFunctionBegin; 9218 /* attach interface graph for determining subsets */ 9219 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9220 IS verticesIS,verticescomm; 9221 PetscInt vsize,*idxs; 9222 9223 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9224 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 9225 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9226 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 9227 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9228 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9229 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 9230 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 9231 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 9232 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 9233 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 9234 } else { 9235 graph = pcbddc->mat_graph; 9236 } 9237 /* print some info */ 9238 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9239 IS vertices; 9240 PetscInt nv,nedges,nfaces; 9241 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 9242 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9243 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 9244 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9245 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 9246 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 9247 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 9248 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 9249 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9250 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9251 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9252 } 9253 9254 /* sub_schurs init */ 9255 if (!pcbddc->sub_schurs) { 9256 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 9257 } 9258 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 9259 9260 /* free graph struct */ 9261 if (pcbddc->sub_schurs_rebuild) { 9262 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 9263 } 9264 PetscFunctionReturn(0); 9265 } 9266 9267 PetscErrorCode PCBDDCCheckOperator(PC pc) 9268 { 9269 PC_IS *pcis=(PC_IS*)pc->data; 9270 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9271 PetscErrorCode ierr; 9272 9273 PetscFunctionBegin; 9274 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9275 IS zerodiag = NULL; 9276 Mat S_j,B0_B=NULL; 9277 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9278 PetscScalar *p0_check,*array,*array2; 9279 PetscReal norm; 9280 PetscInt i; 9281 9282 /* B0 and B0_B */ 9283 if (zerodiag) { 9284 IS dummy; 9285 9286 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 9287 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 9288 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 9289 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 9290 } 9291 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9292 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 9293 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 9294 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9295 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9296 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9297 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9298 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 9299 /* S_j */ 9300 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9301 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9302 9303 /* mimic vector in \widetilde{W}_\Gamma */ 9304 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 9305 /* continuous in primal space */ 9306 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 9307 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9308 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9309 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9310 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 9311 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9312 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9313 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9314 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9315 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9316 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9317 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9318 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 9319 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 9320 9321 /* assemble rhs for coarse problem */ 9322 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9323 /* local with Schur */ 9324 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 9325 if (zerodiag) { 9326 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9327 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9328 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9329 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 9330 } 9331 /* sum on primal nodes the local contributions */ 9332 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9333 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9334 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9335 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9336 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9337 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9338 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9339 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 9340 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9341 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9342 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9343 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9344 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9345 /* scale primal nodes (BDDC sums contibutions) */ 9346 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 9347 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9348 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9349 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9350 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9351 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9352 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9353 /* global: \widetilde{B0}_B w_\Gamma */ 9354 if (zerodiag) { 9355 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 9356 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9357 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9358 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9359 } 9360 /* BDDC */ 9361 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 9362 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 9363 9364 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 9365 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 9366 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 9367 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 9368 for (i=0;i<pcbddc->benign_n;i++) { 9369 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 9370 } 9371 ierr = PetscFree(p0_check);CHKERRQ(ierr); 9372 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 9373 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 9374 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 9375 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9376 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 9377 } 9378 PetscFunctionReturn(0); 9379 } 9380 9381 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9382 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9383 { 9384 Mat At; 9385 IS rows; 9386 PetscInt rst,ren; 9387 PetscErrorCode ierr; 9388 PetscLayout rmap; 9389 9390 PetscFunctionBegin; 9391 rst = ren = 0; 9392 if (ccomm != MPI_COMM_NULL) { 9393 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9394 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9395 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9396 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9397 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9398 } 9399 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9400 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9401 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9402 9403 if (ccomm != MPI_COMM_NULL) { 9404 Mat_MPIAIJ *a,*b; 9405 IS from,to; 9406 Vec gvec; 9407 PetscInt lsize; 9408 9409 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9410 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9411 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9412 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9413 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9414 a = (Mat_MPIAIJ*)At->data; 9415 b = (Mat_MPIAIJ*)(*B)->data; 9416 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9417 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9418 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9419 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9420 b->A = a->A; 9421 b->B = a->B; 9422 9423 b->donotstash = a->donotstash; 9424 b->roworiented = a->roworiented; 9425 b->rowindices = 0; 9426 b->rowvalues = 0; 9427 b->getrowactive = PETSC_FALSE; 9428 9429 (*B)->rmap = rmap; 9430 (*B)->factortype = A->factortype; 9431 (*B)->assembled = PETSC_TRUE; 9432 (*B)->insertmode = NOT_SET_VALUES; 9433 (*B)->preallocated = PETSC_TRUE; 9434 9435 if (a->colmap) { 9436 #if defined(PETSC_USE_CTABLE) 9437 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9438 #else 9439 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9440 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9441 ierr = PetscArraycpy(b->colmap,a->colmap,At->cmap->N);CHKERRQ(ierr); 9442 #endif 9443 } else b->colmap = 0; 9444 if (a->garray) { 9445 PetscInt len; 9446 len = a->B->cmap->n; 9447 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9448 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9449 if (len) { ierr = PetscArraycpy(b->garray,a->garray,len);CHKERRQ(ierr); } 9450 } else b->garray = 0; 9451 9452 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9453 b->lvec = a->lvec; 9454 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9455 9456 /* cannot use VecScatterCopy */ 9457 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9458 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9459 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9460 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9461 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9462 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9463 ierr = ISDestroy(&from);CHKERRQ(ierr); 9464 ierr = ISDestroy(&to);CHKERRQ(ierr); 9465 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9466 } 9467 ierr = MatDestroy(&At);CHKERRQ(ierr); 9468 PetscFunctionReturn(0); 9469 } 9470