1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 26 if (!nr || !nc) PetscFunctionReturn(0); 27 28 /* workspace */ 29 if (!work) { 30 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 31 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 32 } else { 33 ulw = lw; 34 uwork = work; 35 } 36 n = PetscMin(nr,nc); 37 if (!rwork) { 38 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 39 } else { 40 sing = rwork; 41 } 42 43 /* SVD */ 44 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 48 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 49 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 50 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 51 ierr = PetscFPTrapPop();CHKERRQ(ierr); 52 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 53 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 54 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 55 if (!rwork) { 56 ierr = PetscFree(sing);CHKERRQ(ierr); 57 } 58 if (!work) { 59 ierr = PetscFree(uwork);CHKERRQ(ierr); 60 } 61 /* create B */ 62 if (!range) { 63 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 64 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 65 ierr = PetscArraycpy(data,U+nr*i,(nr-i)*nr);CHKERRQ(ierr); 66 } else { 67 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 68 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 69 ierr = PetscArraycpy(data,U,i*nr);CHKERRQ(ierr); 70 } 71 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscFree(U);CHKERRQ(ierr); 73 #else /* PETSC_USE_COMPLEX */ 74 PetscFunctionBegin; 75 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 76 #endif 77 PetscFunctionReturn(0); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 PetscErrorCode ierr; 89 Mat GE,GEd; 90 PetscInt rsize,csize,esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 95 if (!esize) PetscFunctionReturn(0); 96 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 97 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 98 99 /* gradients */ 100 ptr = work + 5*esize; 101 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 103 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 104 ierr = MatDestroy(&GE);CHKERRQ(ierr); 105 106 /* constants */ 107 ptr += rsize*csize; 108 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 109 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 110 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 111 ierr = MatDestroy(&GE);CHKERRQ(ierr); 112 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 113 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 121 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 122 ierr = MatDenseGetArrayRead(GEd,&vals);CHKERRQ(ierr); 123 /* v = PetscAbsScalar(vals[0]) */; 124 v = 1.; 125 cvals[0] = vals[0]/v; 126 cvals[1] = vals[1]/v; 127 ierr = MatDenseRestoreArrayRead(GEd,&vals);CHKERRQ(ierr); 128 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 134 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 135 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 136 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 137 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 138 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 139 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 141 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 142 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 143 } 144 #endif 145 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 146 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 147 } 148 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode PCBDDCNedelecSupport(PC pc) 153 { 154 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 155 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 156 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 157 Vec tvec; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 160 MPI_Comm comm; 161 IS lned,primals,allprimals,nedfieldlocal; 162 IS *eedges,*extrows,*extcols,*alleedges; 163 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 164 PetscScalar *vals,*work; 165 PetscReal *rwork; 166 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 167 PetscInt ne,nv,Lv,order,n,field; 168 PetscInt n_neigh,*neigh,*n_shared,**shared; 169 PetscInt i,j,extmem,cum,maxsize,nee; 170 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 171 PetscInt *sfvleaves,*sfvroots; 172 PetscInt *corners,*cedges; 173 PetscInt *ecount,**eneighs,*vcount,**vneighs; 174 PetscInt *emarks; 175 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 /* If the discrete gradient is defined for a subset of dofs and global is true, 180 it assumes G is given in global ordering for all the dofs. 181 Otherwise, the ordering is global for the Nedelec field */ 182 order = pcbddc->nedorder; 183 conforming = pcbddc->conforming; 184 field = pcbddc->nedfield; 185 global = pcbddc->nedglobal; 186 setprimal = PETSC_FALSE; 187 print = PETSC_FALSE; 188 singular = PETSC_FALSE; 189 190 /* Command line customization */ 191 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 192 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 193 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 194 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 195 /* print debug info TODO: to be removed */ 196 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 197 ierr = PetscOptionsEnd();CHKERRQ(ierr); 198 199 /* Return if there are no edges in the decomposition and the problem is not singular */ 200 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 201 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 202 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 203 if (!singular) { 204 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 205 lrc[0] = PETSC_FALSE; 206 for (i=0;i<n;i++) { 207 if (PetscRealPart(vals[i]) > 2.) { 208 lrc[0] = PETSC_TRUE; 209 break; 210 } 211 } 212 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 213 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 214 if (!lrc[1]) PetscFunctionReturn(0); 215 } 216 217 /* Get Nedelec field */ 218 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 219 if (pcbddc->n_ISForDofsLocal && field >= 0) { 220 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 221 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 222 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 223 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 224 ne = n; 225 nedfieldlocal = NULL; 226 global = PETSC_TRUE; 227 } else if (field == PETSC_DECIDE) { 228 PetscInt rst,ren,*idx; 229 230 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 231 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 232 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 233 for (i=rst;i<ren;i++) { 234 PetscInt nc; 235 236 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 237 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 238 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 239 } 240 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 241 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 242 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 243 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 244 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 245 } else { 246 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 247 } 248 249 /* Sanity checks */ 250 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 251 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 252 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 253 254 /* Just set primal dofs and return */ 255 if (setprimal) { 256 IS enedfieldlocal; 257 PetscInt *eidxs; 258 259 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 260 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 261 if (nedfieldlocal) { 262 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 263 for (i=0,cum=0;i<ne;i++) { 264 if (PetscRealPart(vals[idxs[i]]) > 2.) { 265 eidxs[cum++] = idxs[i]; 266 } 267 } 268 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 } else { 270 for (i=0,cum=0;i<ne;i++) { 271 if (PetscRealPart(vals[i]) > 2.) { 272 eidxs[cum++] = i; 273 } 274 } 275 } 276 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 277 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 278 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 279 ierr = PetscFree(eidxs);CHKERRQ(ierr); 280 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 281 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 /* Compute some l2g maps */ 286 if (nedfieldlocal) { 287 IS is; 288 289 /* need to map from the local Nedelec field to local numbering */ 290 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 291 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 292 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 293 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 294 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 295 if (global) { 296 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 297 el2g = al2g; 298 } else { 299 IS gis; 300 301 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 302 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 303 ierr = ISDestroy(&gis);CHKERRQ(ierr); 304 } 305 ierr = ISDestroy(&is);CHKERRQ(ierr); 306 } else { 307 /* restore default */ 308 pcbddc->nedfield = -1; 309 /* one ref for the destruction of al2g, one for el2g */ 310 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 311 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 312 el2g = al2g; 313 fl2g = NULL; 314 } 315 316 /* Start communication to drop connections for interior edges (for cc analysis only) */ 317 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 318 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 319 if (nedfieldlocal) { 320 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 321 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 322 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 323 } else { 324 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 325 } 326 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 327 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 328 329 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 330 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 331 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 332 if (global) { 333 PetscInt rst; 334 335 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 336 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 337 if (matis->sf_rootdata[i] < 2) { 338 matis->sf_rootdata[cum++] = i + rst; 339 } 340 } 341 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 342 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 343 } else { 344 PetscInt *tbz; 345 346 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 347 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 348 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 349 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 350 for (i=0,cum=0;i<ne;i++) 351 if (matis->sf_leafdata[idxs[i]] == 1) 352 tbz[cum++] = i; 353 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 354 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 355 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 356 ierr = PetscFree(tbz);CHKERRQ(ierr); 357 } 358 } else { /* we need the entire G to infer the nullspace */ 359 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 360 G = pcbddc->discretegradient; 361 } 362 363 /* Extract subdomain relevant rows of G */ 364 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 365 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 366 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 367 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 368 ierr = ISDestroy(&lned);CHKERRQ(ierr); 369 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 370 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 371 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 372 373 /* SF for nodal dofs communications */ 374 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 375 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 376 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 377 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 378 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 379 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 380 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 382 i = singular ? 2 : 1; 383 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 384 385 /* Destroy temporary G created in MATIS format and modified G */ 386 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 387 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 388 ierr = MatDestroy(&G);CHKERRQ(ierr); 389 390 if (print) { 391 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 392 ierr = MatView(lG,NULL);CHKERRQ(ierr); 393 } 394 395 /* Save lG for values insertion in change of basis */ 396 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 397 398 /* Analyze the edge-nodes connections (duplicate lG) */ 399 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 400 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 401 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 402 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 403 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 404 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 405 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 406 /* need to import the boundary specification to ensure the 407 proper detection of coarse edges' endpoints */ 408 if (pcbddc->DirichletBoundariesLocal) { 409 IS is; 410 411 if (fl2g) { 412 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 413 } else { 414 is = pcbddc->DirichletBoundariesLocal; 415 } 416 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 417 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 418 for (i=0;i<cum;i++) { 419 if (idxs[i] >= 0) { 420 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 421 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 422 } 423 } 424 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 425 if (fl2g) { 426 ierr = ISDestroy(&is);CHKERRQ(ierr); 427 } 428 } 429 if (pcbddc->NeumannBoundariesLocal) { 430 IS is; 431 432 if (fl2g) { 433 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 434 } else { 435 is = pcbddc->NeumannBoundariesLocal; 436 } 437 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 438 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 439 for (i=0;i<cum;i++) { 440 if (idxs[i] >= 0) { 441 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 442 } 443 } 444 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 445 if (fl2g) { 446 ierr = ISDestroy(&is);CHKERRQ(ierr); 447 } 448 } 449 450 /* Count neighs per dof */ 451 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 452 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 453 454 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 455 for proper detection of coarse edges' endpoints */ 456 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 457 for (i=0;i<ne;i++) { 458 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 459 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 460 } 461 } 462 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 463 if (!conforming) { 464 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 465 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 466 } 467 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 468 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 469 cum = 0; 470 for (i=0;i<ne;i++) { 471 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 472 if (!PetscBTLookup(btee,i)) { 473 marks[cum++] = i; 474 continue; 475 } 476 /* set badly connected edge dofs as primal */ 477 if (!conforming) { 478 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 479 marks[cum++] = i; 480 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 481 for (j=ii[i];j<ii[i+1];j++) { 482 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 483 } 484 } else { 485 /* every edge dofs should be connected trough a certain number of nodal dofs 486 to other edge dofs belonging to coarse edges 487 - at most 2 endpoints 488 - order-1 interior nodal dofs 489 - no undefined nodal dofs (nconn < order) 490 */ 491 PetscInt ends = 0,ints = 0, undef = 0; 492 for (j=ii[i];j<ii[i+1];j++) { 493 PetscInt v = jj[j],k; 494 PetscInt nconn = iit[v+1]-iit[v]; 495 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 496 if (nconn > order) ends++; 497 else if (nconn == order) ints++; 498 else undef++; 499 } 500 if (undef || ends > 2 || ints != order -1) { 501 marks[cum++] = i; 502 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 503 for (j=ii[i];j<ii[i+1];j++) { 504 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 505 } 506 } 507 } 508 } 509 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 510 if (!order && ii[i+1] != ii[i]) { 511 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 512 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 513 } 514 } 515 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 516 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 517 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 518 if (!conforming) { 519 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 521 } 522 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 523 524 /* identify splitpoints and corner candidates */ 525 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 526 if (print) { 527 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 528 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 529 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 530 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 531 } 532 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 533 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 534 for (i=0;i<nv;i++) { 535 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 536 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 537 if (!order) { /* variable order */ 538 PetscReal vorder = 0.; 539 540 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 541 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 542 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 543 ord = 1; 544 } 545 if (PetscUnlikelyDebug(test%ord)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 546 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 547 if (PetscBTLookup(btbd,jj[j])) { 548 bdir = PETSC_TRUE; 549 break; 550 } 551 if (vc != ecount[jj[j]]) { 552 sneighs = PETSC_FALSE; 553 } else { 554 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 555 for (k=0;k<vc;k++) { 556 if (vn[k] != en[k]) { 557 sneighs = PETSC_FALSE; 558 break; 559 } 560 } 561 } 562 } 563 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 564 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 565 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 566 } else if (test == ord) { 567 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 568 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 569 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 570 } else { 571 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 572 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 573 } 574 } 575 } 576 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 577 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 578 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 579 580 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 581 if (order != 1) { 582 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 583 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 584 for (i=0;i<nv;i++) { 585 if (PetscBTLookup(btvcand,i)) { 586 PetscBool found = PETSC_FALSE; 587 for (j=ii[i];j<ii[i+1] && !found;j++) { 588 PetscInt k,e = jj[j]; 589 if (PetscBTLookup(bte,e)) continue; 590 for (k=iit[e];k<iit[e+1];k++) { 591 PetscInt v = jjt[k]; 592 if (v != i && PetscBTLookup(btvcand,v)) { 593 found = PETSC_TRUE; 594 break; 595 } 596 } 597 } 598 if (!found) { 599 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 600 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 601 } else { 602 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 603 } 604 } 605 } 606 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 607 } 608 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 609 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 610 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 611 612 /* Get the local G^T explicitly */ 613 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 614 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 615 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 616 617 /* Mark interior nodal dofs */ 618 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 619 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 620 for (i=1;i<n_neigh;i++) { 621 for (j=0;j<n_shared[i];j++) { 622 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 623 } 624 } 625 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 626 627 /* communicate corners and splitpoints */ 628 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 629 ierr = PetscArrayzero(sfvleaves,nv);CHKERRQ(ierr); 630 ierr = PetscArrayzero(sfvroots,Lv);CHKERRQ(ierr); 631 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 632 633 if (print) { 634 IS tbz; 635 636 cum = 0; 637 for (i=0;i<nv;i++) 638 if (sfvleaves[i]) 639 vmarks[cum++] = i; 640 641 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 642 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 643 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 644 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 645 } 646 647 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 648 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 649 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 650 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 651 652 /* Zero rows of lGt corresponding to identified corners 653 and interior nodal dofs */ 654 cum = 0; 655 for (i=0;i<nv;i++) { 656 if (sfvleaves[i]) { 657 vmarks[cum++] = i; 658 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 659 } 660 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 661 } 662 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 663 if (print) { 664 IS tbz; 665 666 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 667 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 668 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 669 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 670 } 671 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 672 ierr = PetscFree(vmarks);CHKERRQ(ierr); 673 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 674 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 675 676 /* Recompute G */ 677 ierr = MatDestroy(&lG);CHKERRQ(ierr); 678 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 679 if (print) { 680 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 681 ierr = MatView(lG,NULL);CHKERRQ(ierr); 682 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 683 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 684 } 685 686 /* Get primal dofs (if any) */ 687 cum = 0; 688 for (i=0;i<ne;i++) { 689 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 690 } 691 if (fl2g) { 692 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 693 } 694 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 695 if (print) { 696 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 697 ierr = ISView(primals,NULL);CHKERRQ(ierr); 698 } 699 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 700 /* TODO: what if the user passed in some of them ? */ 701 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 702 ierr = ISDestroy(&primals);CHKERRQ(ierr); 703 704 /* Compute edge connectivity */ 705 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 706 707 /* Symbolic conn = lG*lGt */ 708 ierr = MatProductCreate(lG,lGt,NULL,&conn);CHKERRQ(ierr); 709 ierr = MatProductSetType(conn,MATPRODUCT_AB);CHKERRQ(ierr); 710 ierr = MatProductSetAlgorithm(conn,"default");CHKERRQ(ierr); 711 ierr = MatProductSetFill(conn,PETSC_DEFAULT);CHKERRQ(ierr); 712 ierr = PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");CHKERRQ(ierr); 713 ierr = MatProductSetFromOptions(conn);CHKERRQ(ierr); 714 ierr = MatProductSymbolic(conn);CHKERRQ(ierr); 715 716 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 717 if (fl2g) { 718 PetscBT btf; 719 PetscInt *iia,*jja,*iiu,*jju; 720 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 721 722 /* create CSR for all local dofs */ 723 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 724 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 725 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 726 iiu = pcbddc->mat_graph->xadj; 727 jju = pcbddc->mat_graph->adjncy; 728 } else if (pcbddc->use_local_adj) { 729 rest = PETSC_TRUE; 730 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 731 } else { 732 free = PETSC_TRUE; 733 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 734 iiu[0] = 0; 735 for (i=0;i<n;i++) { 736 iiu[i+1] = i+1; 737 jju[i] = -1; 738 } 739 } 740 741 /* import sizes of CSR */ 742 iia[0] = 0; 743 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 744 745 /* overwrite entries corresponding to the Nedelec field */ 746 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 747 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 748 for (i=0;i<ne;i++) { 749 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 750 iia[idxs[i]+1] = ii[i+1]-ii[i]; 751 } 752 753 /* iia in CSR */ 754 for (i=0;i<n;i++) iia[i+1] += iia[i]; 755 756 /* jja in CSR */ 757 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 758 for (i=0;i<n;i++) 759 if (!PetscBTLookup(btf,i)) 760 for (j=0;j<iiu[i+1]-iiu[i];j++) 761 jja[iia[i]+j] = jju[iiu[i]+j]; 762 763 /* map edge dofs connectivity */ 764 if (jj) { 765 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 766 for (i=0;i<ne;i++) { 767 PetscInt e = idxs[i]; 768 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 769 } 770 } 771 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 772 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 773 if (rest) { 774 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 775 } 776 if (free) { 777 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 778 } 779 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 780 } else { 781 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 782 } 783 784 /* Analyze interface for edge dofs */ 785 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 786 pcbddc->mat_graph->twodim = PETSC_FALSE; 787 788 /* Get coarse edges in the edge space */ 789 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 790 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 791 792 if (fl2g) { 793 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 794 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 795 for (i=0;i<nee;i++) { 796 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 797 } 798 } else { 799 eedges = alleedges; 800 primals = allprimals; 801 } 802 803 /* Mark fine edge dofs with their coarse edge id */ 804 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 805 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 806 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 807 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 808 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 809 if (print) { 810 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 811 ierr = ISView(primals,NULL);CHKERRQ(ierr); 812 } 813 814 maxsize = 0; 815 for (i=0;i<nee;i++) { 816 PetscInt size,mark = i+1; 817 818 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 819 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 820 for (j=0;j<size;j++) marks[idxs[j]] = mark; 821 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 822 maxsize = PetscMax(maxsize,size); 823 } 824 825 /* Find coarse edge endpoints */ 826 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 827 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 828 for (i=0;i<nee;i++) { 829 PetscInt mark = i+1,size; 830 831 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 832 if (!size && nedfieldlocal) continue; 833 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 834 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 835 if (print) { 836 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 837 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 838 } 839 for (j=0;j<size;j++) { 840 PetscInt k, ee = idxs[j]; 841 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 842 for (k=ii[ee];k<ii[ee+1];k++) { 843 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 844 if (PetscBTLookup(btv,jj[k])) { 845 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 846 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 847 PetscInt k2; 848 PetscBool corner = PETSC_FALSE; 849 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 850 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 851 /* it's a corner if either is connected with an edge dof belonging to a different cc or 852 if the edge dof lie on the natural part of the boundary */ 853 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 854 corner = PETSC_TRUE; 855 break; 856 } 857 } 858 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 859 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 860 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 861 } else { 862 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 863 } 864 } 865 } 866 } 867 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 868 } 869 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 870 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 871 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 872 873 /* Reset marked primal dofs */ 874 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 875 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 876 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 877 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 878 879 /* Now use the initial lG */ 880 ierr = MatDestroy(&lG);CHKERRQ(ierr); 881 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 882 lG = lGinit; 883 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 884 885 /* Compute extended cols indices */ 886 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 887 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 888 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 889 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 890 i *= maxsize; 891 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 892 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 893 eerr = PETSC_FALSE; 894 for (i=0;i<nee;i++) { 895 PetscInt size,found = 0; 896 897 cum = 0; 898 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 899 if (!size && nedfieldlocal) continue; 900 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 901 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 902 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 903 for (j=0;j<size;j++) { 904 PetscInt k,ee = idxs[j]; 905 for (k=ii[ee];k<ii[ee+1];k++) { 906 PetscInt vv = jj[k]; 907 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 908 else if (!PetscBTLookupSet(btvc,vv)) found++; 909 } 910 } 911 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 912 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 913 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 914 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 915 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 916 /* it may happen that endpoints are not defined at this point 917 if it is the case, mark this edge for a second pass */ 918 if (cum != size -1 || found != 2) { 919 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 920 if (print) { 921 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 922 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 923 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 924 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 925 } 926 eerr = PETSC_TRUE; 927 } 928 } 929 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 930 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 931 if (done) { 932 PetscInt *newprimals; 933 934 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 935 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 936 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 937 ierr = PetscArraycpy(newprimals,idxs,cum);CHKERRQ(ierr); 938 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 939 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 940 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 941 for (i=0;i<nee;i++) { 942 PetscBool has_candidates = PETSC_FALSE; 943 if (PetscBTLookup(bter,i)) { 944 PetscInt size,mark = i+1; 945 946 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 947 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 948 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 949 for (j=0;j<size;j++) { 950 PetscInt k,ee = idxs[j]; 951 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 952 for (k=ii[ee];k<ii[ee+1];k++) { 953 /* set all candidates located on the edge as corners */ 954 if (PetscBTLookup(btvcand,jj[k])) { 955 PetscInt k2,vv = jj[k]; 956 has_candidates = PETSC_TRUE; 957 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 958 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 959 /* set all edge dofs connected to candidate as primals */ 960 for (k2=iit[vv];k2<iit[vv+1];k2++) { 961 if (marks[jjt[k2]] == mark) { 962 PetscInt k3,ee2 = jjt[k2]; 963 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 964 newprimals[cum++] = ee2; 965 /* finally set the new corners */ 966 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 967 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 968 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 969 } 970 } 971 } 972 } else { 973 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 974 } 975 } 976 } 977 if (!has_candidates) { /* circular edge */ 978 PetscInt k, ee = idxs[0],*tmarks; 979 980 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 981 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 982 for (k=ii[ee];k<ii[ee+1];k++) { 983 PetscInt k2; 984 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 985 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 986 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 987 } 988 for (j=0;j<size;j++) { 989 if (tmarks[idxs[j]] > 1) { 990 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 991 newprimals[cum++] = idxs[j]; 992 } 993 } 994 ierr = PetscFree(tmarks);CHKERRQ(ierr); 995 } 996 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 997 } 998 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 999 } 1000 ierr = PetscFree(extcols);CHKERRQ(ierr); 1001 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1002 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1003 if (fl2g) { 1004 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1005 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1006 for (i=0;i<nee;i++) { 1007 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1008 } 1009 ierr = PetscFree(eedges);CHKERRQ(ierr); 1010 } 1011 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1012 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1013 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1014 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1015 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1016 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1017 pcbddc->mat_graph->twodim = PETSC_FALSE; 1018 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1019 if (fl2g) { 1020 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1021 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1022 for (i=0;i<nee;i++) { 1023 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1024 } 1025 } else { 1026 eedges = alleedges; 1027 primals = allprimals; 1028 } 1029 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1030 1031 /* Mark again */ 1032 ierr = PetscArrayzero(marks,ne);CHKERRQ(ierr); 1033 for (i=0;i<nee;i++) { 1034 PetscInt size,mark = i+1; 1035 1036 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1037 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1038 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1039 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1040 } 1041 if (print) { 1042 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1043 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1044 } 1045 1046 /* Recompute extended cols */ 1047 eerr = PETSC_FALSE; 1048 for (i=0;i<nee;i++) { 1049 PetscInt size; 1050 1051 cum = 0; 1052 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1053 if (!size && nedfieldlocal) continue; 1054 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1055 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1056 for (j=0;j<size;j++) { 1057 PetscInt k,ee = idxs[j]; 1058 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1059 } 1060 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1061 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1062 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1063 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1064 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1065 if (cum != size -1) { 1066 if (print) { 1067 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1068 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1069 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1070 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1071 } 1072 eerr = PETSC_TRUE; 1073 } 1074 } 1075 } 1076 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1077 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1078 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1079 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1080 /* an error should not occur at this point */ 1081 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1082 1083 /* Check the number of endpoints */ 1084 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1085 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1086 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1087 for (i=0;i<nee;i++) { 1088 PetscInt size, found = 0, gc[2]; 1089 1090 /* init with defaults */ 1091 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1092 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1093 if (!size && nedfieldlocal) continue; 1094 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1095 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1096 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1097 for (j=0;j<size;j++) { 1098 PetscInt k,ee = idxs[j]; 1099 for (k=ii[ee];k<ii[ee+1];k++) { 1100 PetscInt vv = jj[k]; 1101 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1102 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1103 corners[i*2+found++] = vv; 1104 } 1105 } 1106 } 1107 if (found != 2) { 1108 PetscInt e; 1109 if (fl2g) { 1110 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1111 } else { 1112 e = idxs[0]; 1113 } 1114 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1115 } 1116 1117 /* get primal dof index on this coarse edge */ 1118 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1119 if (gc[0] > gc[1]) { 1120 PetscInt swap = corners[2*i]; 1121 corners[2*i] = corners[2*i+1]; 1122 corners[2*i+1] = swap; 1123 } 1124 cedges[i] = idxs[size-1]; 1125 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1126 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1127 } 1128 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1129 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1130 1131 if (PetscDefined(USE_DEBUG)) { 1132 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1133 not interfere with neighbouring coarse edges */ 1134 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1135 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1136 for (i=0;i<nv;i++) { 1137 PetscInt emax = 0,eemax = 0; 1138 1139 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1140 ierr = PetscArrayzero(emarks,nee+1);CHKERRQ(ierr); 1141 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1142 for (j=1;j<nee+1;j++) { 1143 if (emax < emarks[j]) { 1144 emax = emarks[j]; 1145 eemax = j; 1146 } 1147 } 1148 /* not relevant for edges */ 1149 if (!eemax) continue; 1150 1151 for (j=ii[i];j<ii[i+1];j++) { 1152 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1153 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1154 } 1155 } 1156 } 1157 ierr = PetscFree(emarks);CHKERRQ(ierr); 1158 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1159 } 1160 1161 /* Compute extended rows indices for edge blocks of the change of basis */ 1162 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1163 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1164 extmem *= maxsize; 1165 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1166 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1167 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1168 for (i=0;i<nv;i++) { 1169 PetscInt mark = 0,size,start; 1170 1171 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1172 for (j=ii[i];j<ii[i+1];j++) 1173 if (marks[jj[j]] && !mark) 1174 mark = marks[jj[j]]; 1175 1176 /* not relevant */ 1177 if (!mark) continue; 1178 1179 /* import extended row */ 1180 mark--; 1181 start = mark*extmem+extrowcum[mark]; 1182 size = ii[i+1]-ii[i]; 1183 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1184 ierr = PetscArraycpy(extrow+start,jj+ii[i],size);CHKERRQ(ierr); 1185 extrowcum[mark] += size; 1186 } 1187 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1188 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1189 ierr = PetscFree(marks);CHKERRQ(ierr); 1190 1191 /* Compress extrows */ 1192 cum = 0; 1193 for (i=0;i<nee;i++) { 1194 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1195 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1196 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1197 cum = PetscMax(cum,size); 1198 } 1199 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1200 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1201 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1202 1203 /* Workspace for lapack inner calls and VecSetValues */ 1204 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1205 1206 /* Create change of basis matrix (preallocation can be improved) */ 1207 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1208 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1209 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1210 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1211 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1212 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1213 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1214 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1215 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1216 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1217 1218 /* Defaults to identity */ 1219 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1220 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1221 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1222 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1223 1224 /* Create discrete gradient for the coarser level if needed */ 1225 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1226 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1227 if (pcbddc->current_level < pcbddc->max_levels) { 1228 ISLocalToGlobalMapping cel2g,cvl2g; 1229 IS wis,gwis; 1230 PetscInt cnv,cne; 1231 1232 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1233 if (fl2g) { 1234 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1237 pcbddc->nedclocal = wis; 1238 } 1239 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1240 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1241 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1242 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1243 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1244 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1245 1246 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1247 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1248 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1249 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1250 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1251 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1252 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1253 1254 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1255 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1256 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1257 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1258 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1259 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1260 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1261 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1262 } 1263 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1264 1265 #if defined(PRINT_GDET) 1266 inc = 0; 1267 lev = pcbddc->current_level; 1268 #endif 1269 1270 /* Insert values in the change of basis matrix */ 1271 for (i=0;i<nee;i++) { 1272 Mat Gins = NULL, GKins = NULL; 1273 IS cornersis = NULL; 1274 PetscScalar cvals[2]; 1275 1276 if (pcbddc->nedcG) { 1277 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1278 } 1279 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1280 if (Gins && GKins) { 1281 const PetscScalar *data; 1282 const PetscInt *rows,*cols; 1283 PetscInt nrh,nch,nrc,ncc; 1284 1285 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1286 /* H1 */ 1287 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1288 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1289 ierr = MatDenseGetArrayRead(Gins,&data);CHKERRQ(ierr); 1290 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1291 ierr = MatDenseRestoreArrayRead(Gins,&data);CHKERRQ(ierr); 1292 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1293 /* complement */ 1294 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1295 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1296 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1297 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1298 ierr = MatDenseGetArrayRead(GKins,&data);CHKERRQ(ierr); 1299 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1300 ierr = MatDenseRestoreArrayRead(GKins,&data);CHKERRQ(ierr); 1301 1302 /* coarse discrete gradient */ 1303 if (pcbddc->nedcG) { 1304 PetscInt cols[2]; 1305 1306 cols[0] = 2*i; 1307 cols[1] = 2*i+1; 1308 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1309 } 1310 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1311 } 1312 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1313 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1314 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1315 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1316 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1319 1320 /* Start assembling */ 1321 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1322 if (pcbddc->nedcG) { 1323 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1324 } 1325 1326 /* Free */ 1327 if (fl2g) { 1328 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1329 for (i=0;i<nee;i++) { 1330 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1331 } 1332 ierr = PetscFree(eedges);CHKERRQ(ierr); 1333 } 1334 1335 /* hack mat_graph with primal dofs on the coarse edges */ 1336 { 1337 PCBDDCGraph graph = pcbddc->mat_graph; 1338 PetscInt *oqueue = graph->queue; 1339 PetscInt *ocptr = graph->cptr; 1340 PetscInt ncc,*idxs; 1341 1342 /* find first primal edge */ 1343 if (pcbddc->nedclocal) { 1344 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1345 } else { 1346 if (fl2g) { 1347 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1348 } 1349 idxs = cedges; 1350 } 1351 cum = 0; 1352 while (cum < nee && cedges[cum] < 0) cum++; 1353 1354 /* adapt connected components */ 1355 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1356 graph->cptr[0] = 0; 1357 for (i=0,ncc=0;i<graph->ncc;i++) { 1358 PetscInt lc = ocptr[i+1]-ocptr[i]; 1359 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1360 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1361 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1362 ncc++; 1363 lc--; 1364 cum++; 1365 while (cum < nee && cedges[cum] < 0) cum++; 1366 } 1367 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1368 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1369 ncc++; 1370 } 1371 graph->ncc = ncc; 1372 if (pcbddc->nedclocal) { 1373 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1374 } 1375 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1376 } 1377 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1378 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1379 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1380 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1381 1382 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1383 ierr = PetscFree(extrow);CHKERRQ(ierr); 1384 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1385 ierr = PetscFree(corners);CHKERRQ(ierr); 1386 ierr = PetscFree(cedges);CHKERRQ(ierr); 1387 ierr = PetscFree(extrows);CHKERRQ(ierr); 1388 ierr = PetscFree(extcols);CHKERRQ(ierr); 1389 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1390 1391 /* Complete assembling */ 1392 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 if (pcbddc->nedcG) { 1394 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 #if 0 1396 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1397 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1398 #endif 1399 } 1400 1401 /* set change of basis */ 1402 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1403 ierr = MatDestroy(&T);CHKERRQ(ierr); 1404 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* the near-null space of BDDC carries information on quadrature weights, 1409 and these can be collinear -> so cheat with MatNullSpaceCreate 1410 and create a suitable set of basis vectors first */ 1411 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1412 { 1413 PetscErrorCode ierr; 1414 PetscInt i; 1415 1416 PetscFunctionBegin; 1417 for (i=0;i<nvecs;i++) { 1418 PetscInt first,last; 1419 1420 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1421 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1422 if (i>=first && i < last) { 1423 PetscScalar *data; 1424 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1425 if (!has_const) { 1426 data[i-first] = 1.; 1427 } else { 1428 data[2*i-first] = 1./PetscSqrtReal(2.); 1429 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1430 } 1431 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1432 } 1433 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1434 } 1435 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1436 for (i=0;i<nvecs;i++) { /* reset vectors */ 1437 PetscInt first,last; 1438 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1439 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1440 if (i>=first && i < last) { 1441 PetscScalar *data; 1442 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1443 if (!has_const) { 1444 data[i-first] = 0.; 1445 } else { 1446 data[2*i-first] = 0.; 1447 data[2*i-first+1] = 0.; 1448 } 1449 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1450 } 1451 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1452 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1458 { 1459 Mat loc_divudotp; 1460 Vec p,v,vins,quad_vec,*quad_vecs; 1461 ISLocalToGlobalMapping map; 1462 PetscScalar *vals; 1463 const PetscScalar *array; 1464 PetscInt i,maxneighs,maxsize,*gidxs; 1465 PetscInt n_neigh,*neigh,*n_shared,**shared; 1466 PetscMPIInt rank; 1467 PetscErrorCode ierr; 1468 1469 PetscFunctionBegin; 1470 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1471 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1472 if (!maxneighs) { 1473 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1474 *nnsp = NULL; 1475 PetscFunctionReturn(0); 1476 } 1477 maxsize = 0; 1478 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1479 ierr = PetscMalloc2(maxsize,&gidxs,maxsize,&vals);CHKERRQ(ierr); 1480 /* create vectors to hold quadrature weights */ 1481 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1482 if (!transpose) { 1483 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1484 } else { 1485 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1486 } 1487 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1488 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1489 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1490 for (i=0;i<maxneighs;i++) { 1491 ierr = VecLockReadPop(quad_vecs[i]);CHKERRQ(ierr); 1492 } 1493 1494 /* compute local quad vec */ 1495 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1496 if (!transpose) { 1497 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1498 } else { 1499 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1500 } 1501 ierr = VecSet(p,1.);CHKERRQ(ierr); 1502 if (!transpose) { 1503 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1504 } else { 1505 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1506 } 1507 if (vl2l) { 1508 Mat lA; 1509 VecScatter sc; 1510 1511 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1512 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1513 ierr = VecScatterCreate(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1514 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1516 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1517 } else { 1518 vins = v; 1519 } 1520 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1521 ierr = VecDestroy(&p);CHKERRQ(ierr); 1522 1523 /* insert in global quadrature vecs */ 1524 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1525 for (i=0;i<n_neigh;i++) { 1526 const PetscInt *idxs; 1527 PetscInt idx,nn,j; 1528 1529 idxs = shared[i]; 1530 nn = n_shared[i]; 1531 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1532 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1533 idx = -(idx+1); 1534 ierr = ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);CHKERRQ(ierr); 1535 ierr = VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1536 } 1537 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1538 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1539 if (vl2l) { 1540 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1541 } 1542 ierr = VecDestroy(&v);CHKERRQ(ierr); 1543 ierr = PetscFree2(gidxs,vals);CHKERRQ(ierr); 1544 1545 /* assemble near null space */ 1546 for (i=0;i<maxneighs;i++) { 1547 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1548 } 1549 for (i=0;i<maxneighs;i++) { 1550 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1551 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1552 ierr = VecLockReadPush(quad_vecs[i]);CHKERRQ(ierr); 1553 } 1554 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1559 { 1560 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 if (primalv) { 1565 if (pcbddc->user_primal_vertices_local) { 1566 IS list[2], newp; 1567 1568 list[0] = primalv; 1569 list[1] = pcbddc->user_primal_vertices_local; 1570 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1571 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1572 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1573 pcbddc->user_primal_vertices_local = newp; 1574 } else { 1575 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1576 } 1577 } 1578 PetscFunctionReturn(0); 1579 } 1580 1581 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1582 { 1583 PetscInt f, *comp = (PetscInt *)ctx; 1584 1585 PetscFunctionBegin; 1586 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1587 PetscFunctionReturn(0); 1588 } 1589 1590 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1591 { 1592 PetscErrorCode ierr; 1593 Vec local,global; 1594 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1595 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1596 PetscBool monolithic = PETSC_FALSE; 1597 1598 PetscFunctionBegin; 1599 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1600 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1601 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1602 /* need to convert from global to local topology information and remove references to information in global ordering */ 1603 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1604 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1605 ierr = VecBindToCPU(global,PETSC_TRUE);CHKERRQ(ierr); 1606 ierr = VecBindToCPU(local,PETSC_TRUE);CHKERRQ(ierr); 1607 if (monolithic) { /* just get block size to properly compute vertices */ 1608 if (pcbddc->vertex_size == 1) { 1609 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1610 } 1611 goto boundary; 1612 } 1613 1614 if (pcbddc->user_provided_isfordofs) { 1615 if (pcbddc->n_ISForDofs) { 1616 PetscInt i; 1617 1618 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1619 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1620 PetscInt bs; 1621 1622 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1623 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1624 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1625 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1626 } 1627 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1628 pcbddc->n_ISForDofs = 0; 1629 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1630 } 1631 } else { 1632 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1633 DM dm; 1634 1635 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1636 if (!dm) { 1637 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1638 } 1639 if (dm) { 1640 IS *fields; 1641 PetscInt nf,i; 1642 1643 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1644 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1645 for (i=0;i<nf;i++) { 1646 PetscInt bs; 1647 1648 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1649 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1650 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1651 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1652 } 1653 ierr = PetscFree(fields);CHKERRQ(ierr); 1654 pcbddc->n_ISForDofsLocal = nf; 1655 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1656 PetscContainer c; 1657 1658 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1659 if (c) { 1660 MatISLocalFields lf; 1661 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1662 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1663 } else { /* fallback, create the default fields if bs > 1 */ 1664 PetscInt i, n = matis->A->rmap->n; 1665 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1666 if (i > 1) { 1667 pcbddc->n_ISForDofsLocal = i; 1668 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1669 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1670 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1671 } 1672 } 1673 } 1674 } 1675 } else { 1676 PetscInt i; 1677 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1678 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1679 } 1680 } 1681 } 1682 1683 boundary: 1684 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1685 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1686 } else if (pcbddc->DirichletBoundariesLocal) { 1687 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1688 } 1689 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1690 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1691 } else if (pcbddc->NeumannBoundariesLocal) { 1692 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1693 } 1694 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1695 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1696 } 1697 ierr = VecDestroy(&global);CHKERRQ(ierr); 1698 ierr = VecDestroy(&local);CHKERRQ(ierr); 1699 /* detect local disconnected subdomains if requested (use matis->A) */ 1700 if (pcbddc->detect_disconnected) { 1701 IS primalv = NULL; 1702 PetscInt i; 1703 PetscBool filter = pcbddc->detect_disconnected_filter; 1704 1705 for (i=0;i<pcbddc->n_local_subs;i++) { 1706 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1707 } 1708 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1709 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1710 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1711 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1712 } 1713 /* early stage corner detection */ 1714 { 1715 DM dm; 1716 1717 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1718 if (!dm) { 1719 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1720 } 1721 if (dm) { 1722 PetscBool isda; 1723 1724 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1725 if (isda) { 1726 ISLocalToGlobalMapping l2l; 1727 IS corners; 1728 Mat lA; 1729 PetscBool gl,lo; 1730 1731 { 1732 Vec cvec; 1733 const PetscScalar *coords; 1734 PetscInt dof,n,cdim; 1735 PetscBool memc = PETSC_TRUE; 1736 1737 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1738 ierr = DMGetCoordinates(dm,&cvec);CHKERRQ(ierr); 1739 ierr = VecGetLocalSize(cvec,&n);CHKERRQ(ierr); 1740 ierr = VecGetBlockSize(cvec,&cdim);CHKERRQ(ierr); 1741 n /= cdim; 1742 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 1743 ierr = PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);CHKERRQ(ierr); 1744 ierr = VecGetArrayRead(cvec,&coords);CHKERRQ(ierr); 1745 #if defined(PETSC_USE_COMPLEX) 1746 memc = PETSC_FALSE; 1747 #endif 1748 if (dof != 1) memc = PETSC_FALSE; 1749 if (memc) { 1750 ierr = PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);CHKERRQ(ierr); 1751 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1752 PetscReal *bcoords = pcbddc->mat_graph->coords; 1753 PetscInt i, b, d; 1754 1755 for (i=0;i<n;i++) { 1756 for (b=0;b<dof;b++) { 1757 for (d=0;d<cdim;d++) { 1758 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1759 } 1760 } 1761 } 1762 } 1763 ierr = VecRestoreArrayRead(cvec,&coords);CHKERRQ(ierr); 1764 pcbddc->mat_graph->cdim = cdim; 1765 pcbddc->mat_graph->cnloc = dof*n; 1766 pcbddc->mat_graph->cloc = PETSC_FALSE; 1767 } 1768 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1769 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1770 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1771 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1772 lo = (PetscBool)(l2l && corners); 1773 ierr = MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1774 if (gl) { /* From PETSc's DMDA */ 1775 const PetscInt *idx; 1776 PetscInt dof,bs,*idxout,n; 1777 1778 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1779 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1780 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1781 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1782 if (bs == dof) { 1783 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1784 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1785 } else { /* the original DMDA local-to-local map have been modified */ 1786 PetscInt i,d; 1787 1788 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1789 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1790 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1791 1792 bs = 1; 1793 n *= dof; 1794 } 1795 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1796 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1797 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1798 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1799 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1800 pcbddc->corner_selected = PETSC_TRUE; 1801 pcbddc->corner_selection = PETSC_TRUE; 1802 } 1803 if (corners) { 1804 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1805 } 1806 } 1807 } 1808 } 1809 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1810 DM dm; 1811 1812 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1813 if (!dm) { 1814 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1815 } 1816 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1817 Vec vcoords; 1818 PetscSection section; 1819 PetscReal *coords; 1820 PetscInt d,cdim,nl,nf,**ctxs; 1821 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1822 1823 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1824 ierr = DMGetLocalSection(dm,§ion);CHKERRQ(ierr); 1825 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1826 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1827 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1828 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1829 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1830 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1831 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1832 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1833 for (d=0;d<cdim;d++) { 1834 PetscInt i; 1835 const PetscScalar *v; 1836 1837 for (i=0;i<nf;i++) ctxs[i][0] = d; 1838 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1839 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1840 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1841 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1842 } 1843 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1844 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1845 ierr = PetscFree(coords);CHKERRQ(ierr); 1846 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1847 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1848 } 1849 } 1850 PetscFunctionReturn(0); 1851 } 1852 1853 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1854 { 1855 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1856 PetscErrorCode ierr; 1857 IS nis; 1858 const PetscInt *idxs; 1859 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1860 PetscBool *ld; 1861 1862 PetscFunctionBegin; 1863 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1864 if (mop == MPI_LAND) { 1865 /* init rootdata with true */ 1866 ld = (PetscBool*) matis->sf_rootdata; 1867 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1868 } else { 1869 ierr = PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);CHKERRQ(ierr); 1870 } 1871 ierr = PetscArrayzero(matis->sf_leafdata,n);CHKERRQ(ierr); 1872 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1873 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1874 ld = (PetscBool*) matis->sf_leafdata; 1875 for (i=0;i<nd;i++) 1876 if (-1 < idxs[i] && idxs[i] < n) 1877 ld[idxs[i]] = PETSC_TRUE; 1878 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1879 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1880 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1881 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1882 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1883 if (mop == MPI_LAND) { 1884 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1885 } else { 1886 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1887 } 1888 for (i=0,nnd=0;i<n;i++) 1889 if (ld[i]) 1890 nidxs[nnd++] = i; 1891 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1892 ierr = ISDestroy(is);CHKERRQ(ierr); 1893 *is = nis; 1894 PetscFunctionReturn(0); 1895 } 1896 1897 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1898 { 1899 PC_IS *pcis = (PC_IS*)(pc->data); 1900 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 if (!pcbddc->benign_have_null) { 1905 PetscFunctionReturn(0); 1906 } 1907 if (pcbddc->ChangeOfBasisMatrix) { 1908 Vec swap; 1909 1910 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1911 swap = pcbddc->work_change; 1912 pcbddc->work_change = r; 1913 r = swap; 1914 } 1915 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1916 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1917 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1918 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 1919 ierr = VecSet(z,0.);CHKERRQ(ierr); 1920 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1921 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1922 if (pcbddc->ChangeOfBasisMatrix) { 1923 pcbddc->work_change = r; 1924 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1925 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1926 } 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1931 { 1932 PCBDDCBenignMatMult_ctx ctx; 1933 PetscErrorCode ierr; 1934 PetscBool apply_right,apply_left,reset_x; 1935 1936 PetscFunctionBegin; 1937 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1938 if (transpose) { 1939 apply_right = ctx->apply_left; 1940 apply_left = ctx->apply_right; 1941 } else { 1942 apply_right = ctx->apply_right; 1943 apply_left = ctx->apply_left; 1944 } 1945 reset_x = PETSC_FALSE; 1946 if (apply_right) { 1947 const PetscScalar *ax; 1948 PetscInt nl,i; 1949 1950 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1951 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1952 ierr = PetscArraycpy(ctx->work,ax,nl);CHKERRQ(ierr); 1953 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1954 for (i=0;i<ctx->benign_n;i++) { 1955 PetscScalar sum,val; 1956 const PetscInt *idxs; 1957 PetscInt nz,j; 1958 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1959 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1960 sum = 0.; 1961 if (ctx->apply_p0) { 1962 val = ctx->work[idxs[nz-1]]; 1963 for (j=0;j<nz-1;j++) { 1964 sum += ctx->work[idxs[j]]; 1965 ctx->work[idxs[j]] += val; 1966 } 1967 } else { 1968 for (j=0;j<nz-1;j++) { 1969 sum += ctx->work[idxs[j]]; 1970 } 1971 } 1972 ctx->work[idxs[nz-1]] -= sum; 1973 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1974 } 1975 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1976 reset_x = PETSC_TRUE; 1977 } 1978 if (transpose) { 1979 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1980 } else { 1981 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1982 } 1983 if (reset_x) { 1984 ierr = VecResetArray(x);CHKERRQ(ierr); 1985 } 1986 if (apply_left) { 1987 PetscScalar *ay; 1988 PetscInt i; 1989 1990 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1991 for (i=0;i<ctx->benign_n;i++) { 1992 PetscScalar sum,val; 1993 const PetscInt *idxs; 1994 PetscInt nz,j; 1995 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1996 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1997 val = -ay[idxs[nz-1]]; 1998 if (ctx->apply_p0) { 1999 sum = 0.; 2000 for (j=0;j<nz-1;j++) { 2001 sum += ay[idxs[j]]; 2002 ay[idxs[j]] += val; 2003 } 2004 ay[idxs[nz-1]] += sum; 2005 } else { 2006 for (j=0;j<nz-1;j++) { 2007 ay[idxs[j]] += val; 2008 } 2009 ay[idxs[nz-1]] = 0.; 2010 } 2011 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2012 } 2013 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2019 { 2020 PetscErrorCode ierr; 2021 2022 PetscFunctionBegin; 2023 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2028 { 2029 PetscErrorCode ierr; 2030 2031 PetscFunctionBegin; 2032 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2037 { 2038 PC_IS *pcis = (PC_IS*)pc->data; 2039 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2040 PCBDDCBenignMatMult_ctx ctx; 2041 PetscErrorCode ierr; 2042 2043 PetscFunctionBegin; 2044 if (!restore) { 2045 Mat A_IB,A_BI; 2046 PetscScalar *work; 2047 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2048 2049 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2050 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2051 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2052 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2053 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2054 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2055 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2056 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2057 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2058 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2059 ctx->apply_left = PETSC_TRUE; 2060 ctx->apply_right = PETSC_FALSE; 2061 ctx->apply_p0 = PETSC_FALSE; 2062 ctx->benign_n = pcbddc->benign_n; 2063 if (reuse) { 2064 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2065 ctx->free = PETSC_FALSE; 2066 } else { /* TODO: could be optimized for successive solves */ 2067 ISLocalToGlobalMapping N_to_D; 2068 PetscInt i; 2069 2070 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2071 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2072 for (i=0;i<pcbddc->benign_n;i++) { 2073 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2074 } 2075 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2076 ctx->free = PETSC_TRUE; 2077 } 2078 ctx->A = pcis->A_IB; 2079 ctx->work = work; 2080 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2081 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 pcis->A_IB = A_IB; 2084 2085 /* A_BI as A_IB^T */ 2086 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2087 pcbddc->benign_original_mat = pcis->A_BI; 2088 pcis->A_BI = A_BI; 2089 } else { 2090 if (!pcbddc->benign_original_mat) { 2091 PetscFunctionReturn(0); 2092 } 2093 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2094 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2095 pcis->A_IB = ctx->A; 2096 ctx->A = NULL; 2097 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2098 pcis->A_BI = pcbddc->benign_original_mat; 2099 pcbddc->benign_original_mat = NULL; 2100 if (ctx->free) { 2101 PetscInt i; 2102 for (i=0;i<ctx->benign_n;i++) { 2103 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2104 } 2105 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2106 } 2107 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2108 ierr = PetscFree(ctx);CHKERRQ(ierr); 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 /* used just in bddc debug mode */ 2114 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2115 { 2116 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2117 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2118 Mat An; 2119 PetscErrorCode ierr; 2120 2121 PetscFunctionBegin; 2122 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2123 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2124 if (is1) { 2125 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2126 ierr = MatDestroy(&An);CHKERRQ(ierr); 2127 } else { 2128 *B = An; 2129 } 2130 PetscFunctionReturn(0); 2131 } 2132 2133 /* TODO: add reuse flag */ 2134 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2135 { 2136 Mat Bt; 2137 PetscScalar *a,*bdata; 2138 const PetscInt *ii,*ij; 2139 PetscInt m,n,i,nnz,*bii,*bij; 2140 PetscBool flg_row; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2145 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2146 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2147 nnz = n; 2148 for (i=0;i<ii[n];i++) { 2149 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2150 } 2151 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2152 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2153 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2154 nnz = 0; 2155 bii[0] = 0; 2156 for (i=0;i<n;i++) { 2157 PetscInt j; 2158 for (j=ii[i];j<ii[i+1];j++) { 2159 PetscScalar entry = a[j]; 2160 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2161 bij[nnz] = ij[j]; 2162 bdata[nnz] = entry; 2163 nnz++; 2164 } 2165 } 2166 bii[i+1] = nnz; 2167 } 2168 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2169 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2170 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2171 { 2172 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2173 b->free_a = PETSC_TRUE; 2174 b->free_ij = PETSC_TRUE; 2175 } 2176 if (*B == A) { 2177 ierr = MatDestroy(&A);CHKERRQ(ierr); 2178 } 2179 *B = Bt; 2180 PetscFunctionReturn(0); 2181 } 2182 2183 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2184 { 2185 Mat B = NULL; 2186 DM dm; 2187 IS is_dummy,*cc_n; 2188 ISLocalToGlobalMapping l2gmap_dummy; 2189 PCBDDCGraph graph; 2190 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2191 PetscInt i,n; 2192 PetscInt *xadj,*adjncy; 2193 PetscBool isplex = PETSC_FALSE; 2194 PetscErrorCode ierr; 2195 2196 PetscFunctionBegin; 2197 if (ncc) *ncc = 0; 2198 if (cc) *cc = NULL; 2199 if (primalv) *primalv = NULL; 2200 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2201 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2202 if (!dm) { 2203 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2204 } 2205 if (dm) { 2206 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2207 } 2208 if (filter) isplex = PETSC_FALSE; 2209 2210 if (isplex) { /* this code has been modified from plexpartition.c */ 2211 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2212 PetscInt *adj = NULL; 2213 IS cellNumbering; 2214 const PetscInt *cellNum; 2215 PetscBool useCone, useClosure; 2216 PetscSection section; 2217 PetscSegBuffer adjBuffer; 2218 PetscSF sfPoint; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2223 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2224 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2225 /* Build adjacency graph via a section/segbuffer */ 2226 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2227 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2228 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2229 /* Always use FVM adjacency to create partitioner graph */ 2230 ierr = DMGetBasicAdjacency(dm, &useCone, &useClosure);CHKERRQ(ierr); 2231 ierr = DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);CHKERRQ(ierr); 2232 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2233 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2234 for (n = 0, p = pStart; p < pEnd; p++) { 2235 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2236 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2237 adjSize = PETSC_DETERMINE; 2238 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2239 for (a = 0; a < adjSize; ++a) { 2240 const PetscInt point = adj[a]; 2241 if (pStart <= point && point < pEnd) { 2242 PetscInt *PETSC_RESTRICT pBuf; 2243 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2244 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2245 *pBuf = point; 2246 } 2247 } 2248 n++; 2249 } 2250 ierr = DMSetBasicAdjacency(dm, useCone, useClosure);CHKERRQ(ierr); 2251 /* Derive CSR graph from section/segbuffer */ 2252 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2253 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2254 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2255 for (idx = 0, p = pStart; p < pEnd; p++) { 2256 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2257 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2258 } 2259 xadj[n] = size; 2260 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2261 /* Clean up */ 2262 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2263 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2264 ierr = PetscFree(adj);CHKERRQ(ierr); 2265 graph->xadj = xadj; 2266 graph->adjncy = adjncy; 2267 } else { 2268 Mat A; 2269 PetscBool isseqaij, flg_row; 2270 2271 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2272 if (!A->rmap->N || !A->cmap->N) { 2273 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2274 PetscFunctionReturn(0); 2275 } 2276 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2277 if (!isseqaij && filter) { 2278 PetscBool isseqdense; 2279 2280 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2281 if (!isseqdense) { 2282 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2283 } else { /* TODO: rectangular case and LDA */ 2284 PetscScalar *array; 2285 PetscReal chop=1.e-6; 2286 2287 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2288 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2289 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2290 for (i=0;i<n;i++) { 2291 PetscInt j; 2292 for (j=i+1;j<n;j++) { 2293 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2294 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2295 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2296 } 2297 } 2298 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2299 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2300 } 2301 } else { 2302 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2303 B = A; 2304 } 2305 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2306 2307 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2308 if (filter) { 2309 PetscScalar *data; 2310 PetscInt j,cum; 2311 2312 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2313 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2314 cum = 0; 2315 for (i=0;i<n;i++) { 2316 PetscInt t; 2317 2318 for (j=xadj[i];j<xadj[i+1];j++) { 2319 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2320 continue; 2321 } 2322 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2323 } 2324 t = xadj_filtered[i]; 2325 xadj_filtered[i] = cum; 2326 cum += t; 2327 } 2328 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2329 graph->xadj = xadj_filtered; 2330 graph->adjncy = adjncy_filtered; 2331 } else { 2332 graph->xadj = xadj; 2333 graph->adjncy = adjncy; 2334 } 2335 } 2336 /* compute local connected components using PCBDDCGraph */ 2337 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2338 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2339 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2340 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2341 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2342 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2343 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2344 2345 /* partial clean up */ 2346 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2347 if (B) { 2348 PetscBool flg_row; 2349 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2350 ierr = MatDestroy(&B);CHKERRQ(ierr); 2351 } 2352 if (isplex) { 2353 ierr = PetscFree(xadj);CHKERRQ(ierr); 2354 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2355 } 2356 2357 /* get back data */ 2358 if (isplex) { 2359 if (ncc) *ncc = graph->ncc; 2360 if (cc || primalv) { 2361 Mat A; 2362 PetscBT btv,btvt; 2363 PetscSection subSection; 2364 PetscInt *ids,cum,cump,*cids,*pids; 2365 2366 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2367 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2368 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2369 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2370 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2371 2372 cids[0] = 0; 2373 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2374 PetscInt j; 2375 2376 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2377 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2378 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2379 2380 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2381 for (k = 0; k < 2*size; k += 2) { 2382 PetscInt s, pp, p = closure[k], off, dof, cdof; 2383 2384 ierr = PetscSectionGetConstraintDof(subSection,p,&cdof);CHKERRQ(ierr); 2385 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2386 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2387 for (s = 0; s < dof-cdof; s++) { 2388 if (PetscBTLookupSet(btvt,off+s)) continue; 2389 if (!PetscBTLookup(btv,off+s)) { 2390 ids[cum++] = off+s; 2391 } else { /* cross-vertex */ 2392 pids[cump++] = off+s; 2393 } 2394 } 2395 ierr = DMPlexGetTreeParent(dm,p,&pp,NULL);CHKERRQ(ierr); 2396 if (pp != p) { 2397 ierr = PetscSectionGetConstraintDof(subSection,pp,&cdof);CHKERRQ(ierr); 2398 ierr = PetscSectionGetOffset(subSection,pp,&off);CHKERRQ(ierr); 2399 ierr = PetscSectionGetDof(subSection,pp,&dof);CHKERRQ(ierr); 2400 for (s = 0; s < dof-cdof; s++) { 2401 if (PetscBTLookupSet(btvt,off+s)) continue; 2402 if (!PetscBTLookup(btv,off+s)) { 2403 ids[cum++] = off+s; 2404 } else { /* cross-vertex */ 2405 pids[cump++] = off+s; 2406 } 2407 } 2408 } 2409 } 2410 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2411 } 2412 cids[i+1] = cum; 2413 /* mark dofs as already assigned */ 2414 for (j = cids[i]; j < cids[i+1]; j++) { 2415 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2416 } 2417 } 2418 if (cc) { 2419 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2420 for (i = 0; i < graph->ncc; i++) { 2421 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2422 } 2423 *cc = cc_n; 2424 } 2425 if (primalv) { 2426 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2427 } 2428 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2429 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2430 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2431 } 2432 } else { 2433 if (ncc) *ncc = graph->ncc; 2434 if (cc) { 2435 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2436 for (i=0;i<graph->ncc;i++) { 2437 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2438 } 2439 *cc = cc_n; 2440 } 2441 } 2442 /* clean up graph */ 2443 graph->xadj = NULL; 2444 graph->adjncy = NULL; 2445 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2446 PetscFunctionReturn(0); 2447 } 2448 2449 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2450 { 2451 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2452 PC_IS* pcis = (PC_IS*)(pc->data); 2453 IS dirIS = NULL; 2454 PetscInt i; 2455 PetscErrorCode ierr; 2456 2457 PetscFunctionBegin; 2458 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2459 if (zerodiag) { 2460 Mat A; 2461 Vec vec3_N; 2462 PetscScalar *vals; 2463 const PetscInt *idxs; 2464 PetscInt nz,*count; 2465 2466 /* p0 */ 2467 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2468 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2469 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2470 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2471 for (i=0;i<nz;i++) vals[i] = 1.; 2472 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2473 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2474 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2475 /* v_I */ 2476 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2477 for (i=0;i<nz;i++) vals[i] = 0.; 2478 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2479 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2480 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2481 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2482 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2483 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2484 if (dirIS) { 2485 PetscInt n; 2486 2487 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2488 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2489 for (i=0;i<n;i++) vals[i] = 0.; 2490 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2491 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2492 } 2493 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2494 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2495 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2496 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2497 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2498 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2499 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2500 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2501 ierr = PetscFree(vals);CHKERRQ(ierr); 2502 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2503 2504 /* there should not be any pressure dofs lying on the interface */ 2505 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2506 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2507 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2508 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2509 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2510 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2511 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2512 ierr = PetscFree(count);CHKERRQ(ierr); 2513 } 2514 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2515 2516 /* check PCBDDCBenignGetOrSetP0 */ 2517 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2518 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2519 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2520 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2521 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2522 for (i=0;i<pcbddc->benign_n;i++) { 2523 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2524 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2525 } 2526 PetscFunctionReturn(0); 2527 } 2528 2529 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2530 { 2531 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2532 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2533 PetscInt nz,n,benign_n,bsp = 1; 2534 PetscInt *interior_dofs,n_interior_dofs,nneu; 2535 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2536 PetscErrorCode ierr; 2537 2538 PetscFunctionBegin; 2539 if (reuse) goto project_b0; 2540 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2541 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2542 for (n=0;n<pcbddc->benign_n;n++) { 2543 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2544 } 2545 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2546 has_null_pressures = PETSC_TRUE; 2547 have_null = PETSC_TRUE; 2548 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2549 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2550 Checks if all the pressure dofs in each subdomain have a zero diagonal 2551 If not, a change of basis on pressures is not needed 2552 since the local Schur complements are already SPD 2553 */ 2554 if (pcbddc->n_ISForDofsLocal) { 2555 IS iP = NULL; 2556 PetscInt p,*pp; 2557 PetscBool flg; 2558 2559 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);CHKERRQ(ierr); 2560 n = pcbddc->n_ISForDofsLocal; 2561 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2562 ierr = PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);CHKERRQ(ierr); 2563 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2564 if (!flg) { 2565 n = 1; 2566 pp[0] = pcbddc->n_ISForDofsLocal-1; 2567 } 2568 2569 bsp = 0; 2570 for (p=0;p<n;p++) { 2571 PetscInt bs; 2572 2573 if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2574 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2575 bsp += bs; 2576 } 2577 ierr = PetscMalloc1(bsp,&bzerodiag);CHKERRQ(ierr); 2578 bsp = 0; 2579 for (p=0;p<n;p++) { 2580 const PetscInt *idxs; 2581 PetscInt b,bs,npl,*bidxs; 2582 2583 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2584 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);CHKERRQ(ierr); 2585 ierr = ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2586 ierr = PetscMalloc1(npl/bs,&bidxs);CHKERRQ(ierr); 2587 for (b=0;b<bs;b++) { 2588 PetscInt i; 2589 2590 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2591 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);CHKERRQ(ierr); 2592 bsp++; 2593 } 2594 ierr = PetscFree(bidxs);CHKERRQ(ierr); 2595 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2596 } 2597 ierr = ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);CHKERRQ(ierr); 2598 2599 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2600 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2601 if (iP) { 2602 IS newpressures; 2603 2604 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2605 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2606 pressures = newpressures; 2607 } 2608 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2609 if (!sorted) { 2610 ierr = ISSort(pressures);CHKERRQ(ierr); 2611 } 2612 ierr = PetscFree(pp);CHKERRQ(ierr); 2613 } 2614 2615 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2616 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2617 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2618 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2619 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2620 if (!sorted) { 2621 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2622 } 2623 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2624 zerodiag_save = zerodiag; 2625 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2626 if (!nz) { 2627 if (n) have_null = PETSC_FALSE; 2628 has_null_pressures = PETSC_FALSE; 2629 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2630 } 2631 recompute_zerodiag = PETSC_FALSE; 2632 2633 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2634 zerodiag_subs = NULL; 2635 benign_n = 0; 2636 n_interior_dofs = 0; 2637 interior_dofs = NULL; 2638 nneu = 0; 2639 if (pcbddc->NeumannBoundariesLocal) { 2640 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2641 } 2642 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2643 if (checkb) { /* need to compute interior nodes */ 2644 PetscInt n,i,j; 2645 PetscInt n_neigh,*neigh,*n_shared,**shared; 2646 PetscInt *iwork; 2647 2648 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2649 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2650 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2651 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2652 for (i=1;i<n_neigh;i++) 2653 for (j=0;j<n_shared[i];j++) 2654 iwork[shared[i][j]] += 1; 2655 for (i=0;i<n;i++) 2656 if (!iwork[i]) 2657 interior_dofs[n_interior_dofs++] = i; 2658 ierr = PetscFree(iwork);CHKERRQ(ierr); 2659 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2660 } 2661 if (has_null_pressures) { 2662 IS *subs; 2663 PetscInt nsubs,i,j,nl; 2664 const PetscInt *idxs; 2665 PetscScalar *array; 2666 Vec *work; 2667 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2668 2669 subs = pcbddc->local_subs; 2670 nsubs = pcbddc->n_local_subs; 2671 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2672 if (checkb) { 2673 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2674 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2675 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2676 /* work[0] = 1_p */ 2677 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2678 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2679 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2680 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2681 /* work[0] = 1_v */ 2682 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2683 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2684 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2685 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2686 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2687 } 2688 2689 if (nsubs > 1 || bsp > 1) { 2690 IS *is; 2691 PetscInt b,totb; 2692 2693 totb = bsp; 2694 is = bsp > 1 ? bzerodiag : &zerodiag; 2695 nsubs = PetscMax(nsubs,1); 2696 ierr = PetscCalloc1(nsubs*totb,&zerodiag_subs);CHKERRQ(ierr); 2697 for (b=0;b<totb;b++) { 2698 for (i=0;i<nsubs;i++) { 2699 ISLocalToGlobalMapping l2g; 2700 IS t_zerodiag_subs; 2701 PetscInt nl; 2702 2703 if (subs) { 2704 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2705 } else { 2706 IS tis; 2707 2708 ierr = MatGetLocalSize(pcbddc->local_mat,&nl,NULL);CHKERRQ(ierr); 2709 ierr = ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);CHKERRQ(ierr); 2710 ierr = ISLocalToGlobalMappingCreateIS(tis,&l2g);CHKERRQ(ierr); 2711 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2712 } 2713 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);CHKERRQ(ierr); 2714 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2715 if (nl) { 2716 PetscBool valid = PETSC_TRUE; 2717 2718 if (checkb) { 2719 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2720 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2721 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2722 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2723 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2724 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2725 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2726 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2727 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2728 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2729 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2730 for (j=0;j<n_interior_dofs;j++) { 2731 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2732 valid = PETSC_FALSE; 2733 break; 2734 } 2735 } 2736 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2737 } 2738 if (valid && nneu) { 2739 const PetscInt *idxs; 2740 PetscInt nzb; 2741 2742 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2743 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2744 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2745 if (nzb) valid = PETSC_FALSE; 2746 } 2747 if (valid && pressures) { 2748 IS t_pressure_subs,tmp; 2749 PetscInt i1,i2; 2750 2751 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2752 ierr = ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);CHKERRQ(ierr); 2753 ierr = ISGetLocalSize(tmp,&i1);CHKERRQ(ierr); 2754 ierr = ISGetLocalSize(t_zerodiag_subs,&i2);CHKERRQ(ierr); 2755 if (i2 != i1) valid = PETSC_FALSE; 2756 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2757 ierr = ISDestroy(&tmp);CHKERRQ(ierr); 2758 } 2759 if (valid) { 2760 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);CHKERRQ(ierr); 2761 benign_n++; 2762 } else recompute_zerodiag = PETSC_TRUE; 2763 } 2764 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2765 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2766 } 2767 } 2768 } else { /* there's just one subdomain (or zero if they have not been detected */ 2769 PetscBool valid = PETSC_TRUE; 2770 2771 if (nneu) valid = PETSC_FALSE; 2772 if (valid && pressures) { 2773 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2774 } 2775 if (valid && checkb) { 2776 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2777 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2778 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2779 for (j=0;j<n_interior_dofs;j++) { 2780 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2781 valid = PETSC_FALSE; 2782 break; 2783 } 2784 } 2785 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2786 } 2787 if (valid) { 2788 benign_n = 1; 2789 ierr = PetscMalloc1(benign_n,&zerodiag_subs);CHKERRQ(ierr); 2790 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2791 zerodiag_subs[0] = zerodiag; 2792 } 2793 } 2794 if (checkb) { 2795 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2796 } 2797 } 2798 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2799 2800 if (!benign_n) { 2801 PetscInt n; 2802 2803 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2804 recompute_zerodiag = PETSC_FALSE; 2805 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2806 if (n) have_null = PETSC_FALSE; 2807 } 2808 2809 /* final check for null pressures */ 2810 if (zerodiag && pressures) { 2811 ierr = ISEqual(pressures,zerodiag,&have_null);CHKERRQ(ierr); 2812 } 2813 2814 if (recompute_zerodiag) { 2815 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2816 if (benign_n == 1) { 2817 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2818 zerodiag = zerodiag_subs[0]; 2819 } else { 2820 PetscInt i,nzn,*new_idxs; 2821 2822 nzn = 0; 2823 for (i=0;i<benign_n;i++) { 2824 PetscInt ns; 2825 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2826 nzn += ns; 2827 } 2828 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2829 nzn = 0; 2830 for (i=0;i<benign_n;i++) { 2831 PetscInt ns,*idxs; 2832 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2833 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2834 ierr = PetscArraycpy(new_idxs+nzn,idxs,ns);CHKERRQ(ierr); 2835 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2836 nzn += ns; 2837 } 2838 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2839 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2840 } 2841 have_null = PETSC_FALSE; 2842 } 2843 2844 /* determines if the coarse solver will be singular or not */ 2845 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2846 2847 /* Prepare matrix to compute no-net-flux */ 2848 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2849 Mat A,loc_divudotp; 2850 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2851 IS row,col,isused = NULL; 2852 PetscInt M,N,n,st,n_isused; 2853 2854 if (pressures) { 2855 isused = pressures; 2856 } else { 2857 isused = zerodiag_save; 2858 } 2859 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2860 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2861 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2862 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2863 n_isused = 0; 2864 if (isused) { 2865 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2866 } 2867 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2868 st = st-n_isused; 2869 if (n) { 2870 const PetscInt *gidxs; 2871 2872 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2873 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2874 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2875 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2876 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2877 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2878 } else { 2879 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2880 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2881 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2882 } 2883 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2884 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2885 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2886 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2887 ierr = ISDestroy(&row);CHKERRQ(ierr); 2888 ierr = ISDestroy(&col);CHKERRQ(ierr); 2889 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2890 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2891 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2892 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2893 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2894 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2895 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2896 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2897 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2898 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2899 } 2900 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2901 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2902 if (bzerodiag) { 2903 PetscInt i; 2904 2905 for (i=0;i<bsp;i++) { 2906 ierr = ISDestroy(&bzerodiag[i]);CHKERRQ(ierr); 2907 } 2908 ierr = PetscFree(bzerodiag);CHKERRQ(ierr); 2909 } 2910 pcbddc->benign_n = benign_n; 2911 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2912 2913 /* determines if the problem has subdomains with 0 pressure block */ 2914 have_null = (PetscBool)(!!pcbddc->benign_n); 2915 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2916 2917 project_b0: 2918 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2919 /* change of basis and p0 dofs */ 2920 if (pcbddc->benign_n) { 2921 PetscInt i,s,*nnz; 2922 2923 /* local change of basis for pressures */ 2924 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2925 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2926 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2927 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2928 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2929 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2930 for (i=0;i<pcbddc->benign_n;i++) { 2931 const PetscInt *idxs; 2932 PetscInt nzs,j; 2933 2934 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2935 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2936 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2937 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2938 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2939 } 2940 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2941 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2942 ierr = PetscFree(nnz);CHKERRQ(ierr); 2943 /* set identity by default */ 2944 for (i=0;i<n;i++) { 2945 ierr = MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);CHKERRQ(ierr); 2946 } 2947 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2948 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2949 /* set change on pressures */ 2950 for (s=0;s<pcbddc->benign_n;s++) { 2951 PetscScalar *array; 2952 const PetscInt *idxs; 2953 PetscInt nzs; 2954 2955 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2956 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2957 for (i=0;i<nzs-1;i++) { 2958 PetscScalar vals[2]; 2959 PetscInt cols[2]; 2960 2961 cols[0] = idxs[i]; 2962 cols[1] = idxs[nzs-1]; 2963 vals[0] = 1.; 2964 vals[1] = 1.; 2965 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2966 } 2967 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2968 for (i=0;i<nzs-1;i++) array[i] = -1.; 2969 array[nzs-1] = 1.; 2970 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2971 /* store local idxs for p0 */ 2972 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2973 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2974 ierr = PetscFree(array);CHKERRQ(ierr); 2975 } 2976 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2977 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2978 2979 /* project if needed */ 2980 if (pcbddc->benign_change_explicit) { 2981 Mat M; 2982 2983 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2984 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2985 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2986 ierr = MatDestroy(&M);CHKERRQ(ierr); 2987 } 2988 /* store global idxs for p0 */ 2989 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2990 } 2991 *zerodiaglocal = zerodiag; 2992 PetscFunctionReturn(0); 2993 } 2994 2995 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2996 { 2997 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2998 PetscScalar *array; 2999 PetscErrorCode ierr; 3000 3001 PetscFunctionBegin; 3002 if (!pcbddc->benign_sf) { 3003 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 3004 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 3005 } 3006 if (get) { 3007 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3008 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 3009 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 3010 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 3011 } else { 3012 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 3013 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 3014 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 3015 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 3016 } 3017 PetscFunctionReturn(0); 3018 } 3019 3020 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3021 { 3022 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3023 PetscErrorCode ierr; 3024 3025 PetscFunctionBegin; 3026 /* TODO: add error checking 3027 - avoid nested pop (or push) calls. 3028 - cannot push before pop. 3029 - cannot call this if pcbddc->local_mat is NULL 3030 */ 3031 if (!pcbddc->benign_n) { 3032 PetscFunctionReturn(0); 3033 } 3034 if (pop) { 3035 if (pcbddc->benign_change_explicit) { 3036 IS is_p0; 3037 MatReuse reuse; 3038 3039 /* extract B_0 */ 3040 reuse = MAT_INITIAL_MATRIX; 3041 if (pcbddc->benign_B0) { 3042 reuse = MAT_REUSE_MATRIX; 3043 } 3044 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 3045 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 3046 /* remove rows and cols from local problem */ 3047 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 3048 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3049 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 3050 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3051 } else { 3052 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3053 PetscScalar *vals; 3054 PetscInt i,n,*idxs_ins; 3055 3056 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 3057 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 3058 if (!pcbddc->benign_B0) { 3059 PetscInt *nnz; 3060 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 3061 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 3062 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3063 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 3064 for (i=0;i<pcbddc->benign_n;i++) { 3065 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 3066 nnz[i] = n - nnz[i]; 3067 } 3068 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 3069 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3070 ierr = PetscFree(nnz);CHKERRQ(ierr); 3071 } 3072 3073 for (i=0;i<pcbddc->benign_n;i++) { 3074 PetscScalar *array; 3075 PetscInt *idxs,j,nz,cum; 3076 3077 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 3078 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3079 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3080 for (j=0;j<nz;j++) vals[j] = 1.; 3081 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3082 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 3083 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 3084 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 3085 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3086 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3087 cum = 0; 3088 for (j=0;j<n;j++) { 3089 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3090 vals[cum] = array[j]; 3091 idxs_ins[cum] = j; 3092 cum++; 3093 } 3094 } 3095 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3096 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3097 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3098 } 3099 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3100 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3101 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3102 } 3103 } else { /* push */ 3104 if (pcbddc->benign_change_explicit) { 3105 PetscInt i; 3106 3107 for (i=0;i<pcbddc->benign_n;i++) { 3108 PetscScalar *B0_vals; 3109 PetscInt *B0_cols,B0_ncol; 3110 3111 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3112 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3113 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3114 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3115 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3116 } 3117 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3118 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3119 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3120 } 3121 PetscFunctionReturn(0); 3122 } 3123 3124 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3125 { 3126 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3127 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3128 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3129 PetscBLASInt *B_iwork,*B_ifail; 3130 PetscScalar *work,lwork; 3131 PetscScalar *St,*S,*eigv; 3132 PetscScalar *Sarray,*Starray; 3133 PetscReal *eigs,thresh,lthresh,uthresh; 3134 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3135 PetscBool allocated_S_St; 3136 #if defined(PETSC_USE_COMPLEX) 3137 PetscReal *rwork; 3138 #endif 3139 PetscErrorCode ierr; 3140 3141 PetscFunctionBegin; 3142 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3143 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3144 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3145 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3146 3147 if (pcbddc->dbg_flag) { 3148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3150 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3151 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3152 } 3153 3154 if (pcbddc->dbg_flag) { 3155 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3156 } 3157 3158 /* max size of subsets */ 3159 mss = 0; 3160 for (i=0;i<sub_schurs->n_subs;i++) { 3161 PetscInt subset_size; 3162 3163 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3164 mss = PetscMax(mss,subset_size); 3165 } 3166 3167 /* min/max and threshold */ 3168 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3169 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3170 nmax = PetscMax(nmin,nmax); 3171 allocated_S_St = PETSC_FALSE; 3172 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3173 allocated_S_St = PETSC_TRUE; 3174 } 3175 3176 /* allocate lapack workspace */ 3177 cum = cum2 = 0; 3178 maxneigs = 0; 3179 for (i=0;i<sub_schurs->n_subs;i++) { 3180 PetscInt n,subset_size; 3181 3182 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3183 n = PetscMin(subset_size,nmax); 3184 cum += subset_size; 3185 cum2 += subset_size*n; 3186 maxneigs = PetscMax(maxneigs,n); 3187 } 3188 lwork = 0; 3189 if (mss) { 3190 if (sub_schurs->is_symmetric) { 3191 PetscScalar sdummy = 0.; 3192 PetscBLASInt B_itype = 1; 3193 PetscBLASInt B_N = mss, idummy = 0; 3194 PetscReal rdummy = 0.,zero = 0.0; 3195 PetscReal eps = 0.0; /* dlamch? */ 3196 3197 B_lwork = -1; 3198 /* some implementations may complain about NULL pointers, even if we are querying */ 3199 S = &sdummy; 3200 St = &sdummy; 3201 eigs = &rdummy; 3202 eigv = &sdummy; 3203 B_iwork = &idummy; 3204 B_ifail = &idummy; 3205 #if defined(PETSC_USE_COMPLEX) 3206 rwork = &rdummy; 3207 #endif 3208 thresh = 1.0; 3209 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3210 #if defined(PETSC_USE_COMPLEX) 3211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3212 #else 3213 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3214 #endif 3215 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3216 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3217 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3218 } 3219 3220 nv = 0; 3221 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3222 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3223 } 3224 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3225 if (allocated_S_St) { 3226 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3227 } 3228 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3229 #if defined(PETSC_USE_COMPLEX) 3230 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3231 #endif 3232 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3233 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3234 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3235 nv+cum,&pcbddc->adaptive_constraints_idxs, 3236 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3237 ierr = PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);CHKERRQ(ierr); 3238 3239 maxneigs = 0; 3240 cum = cumarray = 0; 3241 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3242 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3243 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3244 const PetscInt *idxs; 3245 3246 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3247 for (cum=0;cum<nv;cum++) { 3248 pcbddc->adaptive_constraints_n[cum] = 1; 3249 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3250 pcbddc->adaptive_constraints_data[cum] = 1.0; 3251 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3252 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3253 } 3254 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3255 } 3256 3257 if (mss) { /* multilevel */ 3258 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3259 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3260 } 3261 3262 lthresh = pcbddc->adaptive_threshold[0]; 3263 uthresh = pcbddc->adaptive_threshold[1]; 3264 for (i=0;i<sub_schurs->n_subs;i++) { 3265 const PetscInt *idxs; 3266 PetscReal upper,lower; 3267 PetscInt j,subset_size,eigs_start = 0; 3268 PetscBLASInt B_N; 3269 PetscBool same_data = PETSC_FALSE; 3270 PetscBool scal = PETSC_FALSE; 3271 3272 if (pcbddc->use_deluxe_scaling) { 3273 upper = PETSC_MAX_REAL; 3274 lower = uthresh; 3275 } else { 3276 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3277 upper = 1./uthresh; 3278 lower = 0.; 3279 } 3280 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3281 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3282 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3283 /* this is experimental: we assume the dofs have been properly grouped to have 3284 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3285 if (!sub_schurs->is_posdef) { 3286 Mat T; 3287 3288 for (j=0;j<subset_size;j++) { 3289 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3290 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3291 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3292 ierr = MatDestroy(&T);CHKERRQ(ierr); 3293 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3294 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3295 ierr = MatDestroy(&T);CHKERRQ(ierr); 3296 if (sub_schurs->change_primal_sub) { 3297 PetscInt nz,k; 3298 const PetscInt *idxs; 3299 3300 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3301 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3302 for (k=0;k<nz;k++) { 3303 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3304 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3305 } 3306 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3307 } 3308 scal = PETSC_TRUE; 3309 break; 3310 } 3311 } 3312 } 3313 3314 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3315 if (sub_schurs->is_symmetric) { 3316 PetscInt j,k; 3317 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3318 ierr = PetscArrayzero(S,subset_size*subset_size);CHKERRQ(ierr); 3319 ierr = PetscArrayzero(St,subset_size*subset_size);CHKERRQ(ierr); 3320 } 3321 for (j=0;j<subset_size;j++) { 3322 for (k=j;k<subset_size;k++) { 3323 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3324 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3325 } 3326 } 3327 } else { 3328 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3329 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3330 } 3331 } else { 3332 S = Sarray + cumarray; 3333 St = Starray + cumarray; 3334 } 3335 /* see if we can save some work */ 3336 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3337 ierr = PetscArraycmp(S,St,subset_size*subset_size,&same_data);CHKERRQ(ierr); 3338 } 3339 3340 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3341 B_neigs = 0; 3342 } else { 3343 if (sub_schurs->is_symmetric) { 3344 PetscBLASInt B_itype = 1; 3345 PetscBLASInt B_IL, B_IU; 3346 PetscReal eps = -1.0; /* dlamch? */ 3347 PetscInt nmin_s; 3348 PetscBool compute_range; 3349 3350 B_neigs = 0; 3351 compute_range = (PetscBool)!same_data; 3352 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3353 3354 if (pcbddc->dbg_flag) { 3355 PetscInt nc = 0; 3356 3357 if (sub_schurs->change_primal_sub) { 3358 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3359 } 3360 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3361 } 3362 3363 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3364 if (compute_range) { 3365 3366 /* ask for eigenvalues larger than thresh */ 3367 if (sub_schurs->is_posdef) { 3368 #if defined(PETSC_USE_COMPLEX) 3369 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3370 #else 3371 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3372 #endif 3373 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3374 } else { /* no theory so far, but it works nicely */ 3375 PetscInt recipe = 0,recipe_m = 1; 3376 PetscReal bb[2]; 3377 3378 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3379 switch (recipe) { 3380 case 0: 3381 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3382 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3383 #if defined(PETSC_USE_COMPLEX) 3384 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3385 #else 3386 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3387 #endif 3388 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3389 break; 3390 case 1: 3391 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3392 #if defined(PETSC_USE_COMPLEX) 3393 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3394 #else 3395 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3396 #endif 3397 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3398 if (!scal) { 3399 PetscBLASInt B_neigs2 = 0; 3400 3401 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3402 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3403 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3404 #if defined(PETSC_USE_COMPLEX) 3405 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3406 #else 3407 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3408 #endif 3409 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3410 B_neigs += B_neigs2; 3411 } 3412 break; 3413 case 2: 3414 if (scal) { 3415 bb[0] = PETSC_MIN_REAL; 3416 bb[1] = 0; 3417 #if defined(PETSC_USE_COMPLEX) 3418 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3419 #else 3420 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3421 #endif 3422 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3423 } else { 3424 PetscBLASInt B_neigs2 = 0; 3425 PetscBool import = PETSC_FALSE; 3426 3427 lthresh = PetscMax(lthresh,0.0); 3428 if (lthresh > 0.0) { 3429 bb[0] = PETSC_MIN_REAL; 3430 bb[1] = lthresh*lthresh; 3431 3432 import = PETSC_TRUE; 3433 #if defined(PETSC_USE_COMPLEX) 3434 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3435 #else 3436 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3437 #endif 3438 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3439 } 3440 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3441 bb[1] = PETSC_MAX_REAL; 3442 if (import) { 3443 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3444 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3445 } 3446 #if defined(PETSC_USE_COMPLEX) 3447 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3448 #else 3449 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3450 #endif 3451 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3452 B_neigs += B_neigs2; 3453 } 3454 break; 3455 case 3: 3456 if (scal) { 3457 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3458 } else { 3459 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3460 } 3461 if (!scal) { 3462 bb[0] = uthresh; 3463 bb[1] = PETSC_MAX_REAL; 3464 #if defined(PETSC_USE_COMPLEX) 3465 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3466 #else 3467 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3468 #endif 3469 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3470 } 3471 if (recipe_m > 0 && B_N - B_neigs > 0) { 3472 PetscBLASInt B_neigs2 = 0; 3473 3474 B_IL = 1; 3475 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3476 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3477 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3478 #if defined(PETSC_USE_COMPLEX) 3479 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3480 #else 3481 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3482 #endif 3483 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3484 B_neigs += B_neigs2; 3485 } 3486 break; 3487 case 4: 3488 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3489 #if defined(PETSC_USE_COMPLEX) 3490 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3491 #else 3492 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3493 #endif 3494 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3495 { 3496 PetscBLASInt B_neigs2 = 0; 3497 3498 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3499 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3500 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3501 #if defined(PETSC_USE_COMPLEX) 3502 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3503 #else 3504 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3505 #endif 3506 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3507 B_neigs += B_neigs2; 3508 } 3509 break; 3510 case 5: /* same as before: first compute all eigenvalues, then filter */ 3511 #if defined(PETSC_USE_COMPLEX) 3512 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3513 #else 3514 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3515 #endif 3516 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3517 { 3518 PetscInt e,k,ne; 3519 for (e=0,ne=0;e<B_neigs;e++) { 3520 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3521 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3522 eigs[ne] = eigs[e]; 3523 ne++; 3524 } 3525 } 3526 ierr = PetscArraycpy(eigv,S,B_N*ne);CHKERRQ(ierr); 3527 B_neigs = ne; 3528 } 3529 break; 3530 default: 3531 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3532 break; 3533 } 3534 } 3535 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3536 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3537 B_IL = 1; 3538 #if defined(PETSC_USE_COMPLEX) 3539 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3540 #else 3541 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3542 #endif 3543 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3544 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3545 PetscInt k; 3546 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3547 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3548 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3549 nmin = nmax; 3550 ierr = PetscArrayzero(eigv,subset_size*nmax);CHKERRQ(ierr); 3551 for (k=0;k<nmax;k++) { 3552 eigs[k] = 1./PETSC_SMALL; 3553 eigv[k*(subset_size+1)] = 1.0; 3554 } 3555 } 3556 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3557 if (B_ierr) { 3558 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3559 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3560 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3561 } 3562 3563 if (B_neigs > nmax) { 3564 if (pcbddc->dbg_flag) { 3565 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3566 } 3567 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3568 B_neigs = nmax; 3569 } 3570 3571 nmin_s = PetscMin(nmin,B_N); 3572 if (B_neigs < nmin_s) { 3573 PetscBLASInt B_neigs2 = 0; 3574 3575 if (pcbddc->use_deluxe_scaling) { 3576 if (scal) { 3577 B_IU = nmin_s; 3578 B_IL = B_neigs + 1; 3579 } else { 3580 B_IL = B_N - nmin_s + 1; 3581 B_IU = B_N - B_neigs; 3582 } 3583 } else { 3584 B_IL = B_neigs + 1; 3585 B_IU = nmin_s; 3586 } 3587 if (pcbddc->dbg_flag) { 3588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3589 } 3590 if (sub_schurs->is_symmetric) { 3591 PetscInt j,k; 3592 for (j=0;j<subset_size;j++) { 3593 for (k=j;k<subset_size;k++) { 3594 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3595 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3596 } 3597 } 3598 } else { 3599 ierr = PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3600 ierr = PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);CHKERRQ(ierr); 3601 } 3602 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3603 #if defined(PETSC_USE_COMPLEX) 3604 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3605 #else 3606 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3607 #endif 3608 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3609 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3610 B_neigs += B_neigs2; 3611 } 3612 if (B_ierr) { 3613 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3614 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3615 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3616 } 3617 if (pcbddc->dbg_flag) { 3618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3619 for (j=0;j<B_neigs;j++) { 3620 if (eigs[j] == 0.0) { 3621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3622 } else { 3623 if (pcbddc->use_deluxe_scaling) { 3624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3625 } else { 3626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3627 } 3628 } 3629 } 3630 } 3631 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3632 } 3633 /* change the basis back to the original one */ 3634 if (sub_schurs->change) { 3635 Mat change,phi,phit; 3636 3637 if (pcbddc->dbg_flag > 2) { 3638 PetscInt ii; 3639 for (ii=0;ii<B_neigs;ii++) { 3640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3641 for (j=0;j<B_N;j++) { 3642 #if defined(PETSC_USE_COMPLEX) 3643 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3644 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3645 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3646 #else 3647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3648 #endif 3649 } 3650 } 3651 } 3652 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3653 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3654 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3655 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3656 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3657 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3658 } 3659 maxneigs = PetscMax(B_neigs,maxneigs); 3660 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3661 if (B_neigs) { 3662 ierr = PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);CHKERRQ(ierr); 3663 3664 if (pcbddc->dbg_flag > 1) { 3665 PetscInt ii; 3666 for (ii=0;ii<B_neigs;ii++) { 3667 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3668 for (j=0;j<B_N;j++) { 3669 #if defined(PETSC_USE_COMPLEX) 3670 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3671 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3672 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3673 #else 3674 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3675 #endif 3676 } 3677 } 3678 } 3679 ierr = PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);CHKERRQ(ierr); 3680 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3681 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3682 cum++; 3683 } 3684 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3685 /* shift for next computation */ 3686 cumarray += subset_size*subset_size; 3687 } 3688 if (pcbddc->dbg_flag) { 3689 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3690 } 3691 3692 if (mss) { 3693 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3694 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3695 /* destroy matrices (junk) */ 3696 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3697 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3698 } 3699 if (allocated_S_St) { 3700 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3701 } 3702 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3703 #if defined(PETSC_USE_COMPLEX) 3704 ierr = PetscFree(rwork);CHKERRQ(ierr); 3705 #endif 3706 if (pcbddc->dbg_flag) { 3707 PetscInt maxneigs_r; 3708 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3709 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3710 } 3711 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3712 PetscFunctionReturn(0); 3713 } 3714 3715 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3716 { 3717 PetscScalar *coarse_submat_vals; 3718 PetscErrorCode ierr; 3719 3720 PetscFunctionBegin; 3721 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3722 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3723 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3724 3725 /* Setup local neumann solver ksp_R */ 3726 /* PCBDDCSetUpLocalScatters should be called first! */ 3727 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3728 3729 /* 3730 Setup local correction and local part of coarse basis. 3731 Gives back the dense local part of the coarse matrix in column major ordering 3732 */ 3733 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3734 3735 /* Compute total number of coarse nodes and setup coarse solver */ 3736 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3737 3738 /* free */ 3739 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3740 PetscFunctionReturn(0); 3741 } 3742 3743 PetscErrorCode PCBDDCResetCustomization(PC pc) 3744 { 3745 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3746 PetscErrorCode ierr; 3747 3748 PetscFunctionBegin; 3749 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3750 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3751 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3752 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3753 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3754 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3755 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3756 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3757 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3758 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3759 PetscFunctionReturn(0); 3760 } 3761 3762 PetscErrorCode PCBDDCResetTopography(PC pc) 3763 { 3764 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3765 PetscInt i; 3766 PetscErrorCode ierr; 3767 3768 PetscFunctionBegin; 3769 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3770 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3771 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3772 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3773 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3774 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3775 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3776 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3777 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3778 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3779 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3780 for (i=0;i<pcbddc->n_local_subs;i++) { 3781 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3782 } 3783 pcbddc->n_local_subs = 0; 3784 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3785 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3786 pcbddc->graphanalyzed = PETSC_FALSE; 3787 pcbddc->recompute_topography = PETSC_TRUE; 3788 pcbddc->corner_selected = PETSC_FALSE; 3789 PetscFunctionReturn(0); 3790 } 3791 3792 PetscErrorCode PCBDDCResetSolvers(PC pc) 3793 { 3794 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3795 PetscErrorCode ierr; 3796 3797 PetscFunctionBegin; 3798 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3799 if (pcbddc->coarse_phi_B) { 3800 PetscScalar *array; 3801 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3802 ierr = PetscFree(array);CHKERRQ(ierr); 3803 } 3804 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3805 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3806 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3807 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3808 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3809 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3810 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3811 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3812 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3813 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3814 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3815 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3816 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3817 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3818 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3819 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3820 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3821 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3822 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3823 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3824 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3825 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3826 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3827 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3828 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3829 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3830 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3831 if (pcbddc->benign_zerodiag_subs) { 3832 PetscInt i; 3833 for (i=0;i<pcbddc->benign_n;i++) { 3834 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3835 } 3836 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3837 } 3838 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3839 PetscFunctionReturn(0); 3840 } 3841 3842 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3843 { 3844 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3845 PC_IS *pcis = (PC_IS*)pc->data; 3846 VecType impVecType; 3847 PetscInt n_constraints,n_R,old_size; 3848 PetscErrorCode ierr; 3849 3850 PetscFunctionBegin; 3851 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3852 n_R = pcis->n - pcbddc->n_vertices; 3853 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3854 /* local work vectors (try to avoid unneeded work)*/ 3855 /* R nodes */ 3856 old_size = -1; 3857 if (pcbddc->vec1_R) { 3858 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3859 } 3860 if (n_R != old_size) { 3861 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3862 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3863 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3864 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3865 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3866 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3867 } 3868 /* local primal dofs */ 3869 old_size = -1; 3870 if (pcbddc->vec1_P) { 3871 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3872 } 3873 if (pcbddc->local_primal_size != old_size) { 3874 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3875 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3876 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3877 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3878 } 3879 /* local explicit constraints */ 3880 old_size = -1; 3881 if (pcbddc->vec1_C) { 3882 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3883 } 3884 if (n_constraints && n_constraints != old_size) { 3885 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3886 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3887 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3888 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3889 } 3890 PetscFunctionReturn(0); 3891 } 3892 3893 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3894 { 3895 PetscErrorCode ierr; 3896 /* pointers to pcis and pcbddc */ 3897 PC_IS* pcis = (PC_IS*)pc->data; 3898 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3899 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3900 /* submatrices of local problem */ 3901 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3902 /* submatrices of local coarse problem */ 3903 Mat S_VV,S_CV,S_VC,S_CC; 3904 /* working matrices */ 3905 Mat C_CR; 3906 /* additional working stuff */ 3907 PC pc_R; 3908 Mat F,Brhs = NULL; 3909 Vec dummy_vec; 3910 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3911 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3912 PetscScalar *work; 3913 PetscInt *idx_V_B; 3914 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3915 PetscInt i,n_R,n_D,n_B; 3916 PetscScalar one=1.0,m_one=-1.0; 3917 3918 PetscFunctionBegin; 3919 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3920 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3921 3922 /* Set Non-overlapping dimensions */ 3923 n_vertices = pcbddc->n_vertices; 3924 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3925 n_B = pcis->n_B; 3926 n_D = pcis->n - n_B; 3927 n_R = pcis->n - n_vertices; 3928 3929 /* vertices in boundary numbering */ 3930 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3931 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3932 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3933 3934 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3935 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3936 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3937 ierr = MatDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3939 ierr = MatDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3940 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3941 ierr = MatDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3942 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3943 ierr = MatDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3944 3945 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3946 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3947 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3948 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3949 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3950 lda_rhs = n_R; 3951 need_benign_correction = PETSC_FALSE; 3952 if (isLU || isCHOL) { 3953 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3954 } else if (sub_schurs && sub_schurs->reuse_solver) { 3955 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3956 MatFactorType type; 3957 3958 F = reuse_solver->F; 3959 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3960 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3961 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3962 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3963 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3964 } else F = NULL; 3965 3966 /* determine if we can use a sparse right-hand side */ 3967 sparserhs = PETSC_FALSE; 3968 if (F) { 3969 MatSolverType solver; 3970 3971 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3972 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3973 } 3974 3975 /* allocate workspace */ 3976 n = 0; 3977 if (n_constraints) { 3978 n += lda_rhs*n_constraints; 3979 } 3980 if (n_vertices) { 3981 n = PetscMax(2*lda_rhs*n_vertices,n); 3982 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3983 } 3984 if (!pcbddc->symmetric_primal) { 3985 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3986 } 3987 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3988 3989 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3990 dummy_vec = NULL; 3991 if (need_benign_correction && lda_rhs != n_R && F) { 3992 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3993 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 3994 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 3995 } 3996 3997 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3998 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3999 4000 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4001 if (n_constraints) { 4002 Mat M3,C_B; 4003 IS is_aux; 4004 PetscScalar *array,*array2; 4005 4006 /* Extract constraints on R nodes: C_{CR} */ 4007 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 4008 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 4009 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4010 4011 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4012 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4013 if (!sparserhs) { 4014 ierr = PetscArrayzero(work,lda_rhs*n_constraints);CHKERRQ(ierr); 4015 for (i=0;i<n_constraints;i++) { 4016 const PetscScalar *row_cmat_values; 4017 const PetscInt *row_cmat_indices; 4018 PetscInt size_of_constraint,j; 4019 4020 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4021 for (j=0;j<size_of_constraint;j++) { 4022 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 4023 } 4024 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 4025 } 4026 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 4027 } else { 4028 Mat tC_CR; 4029 4030 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4031 if (lda_rhs != n_R) { 4032 PetscScalar *aa; 4033 PetscInt r,*ii,*jj; 4034 PetscBool done; 4035 4036 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4037 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4038 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 4039 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 4040 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4041 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4042 } else { 4043 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 4044 tC_CR = C_CR; 4045 } 4046 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 4047 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 4048 } 4049 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 4050 if (F) { 4051 if (need_benign_correction) { 4052 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4053 4054 /* rhs is already zero on interior dofs, no need to change the rhs */ 4055 ierr = PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);CHKERRQ(ierr); 4056 } 4057 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 4058 if (need_benign_correction) { 4059 PetscScalar *marr; 4060 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4061 4062 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4063 if (lda_rhs != n_R) { 4064 for (i=0;i<n_constraints;i++) { 4065 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4066 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4067 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4068 } 4069 } else { 4070 for (i=0;i<n_constraints;i++) { 4071 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4072 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4073 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4074 } 4075 } 4076 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4077 } 4078 } else { 4079 PetscScalar *marr; 4080 4081 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4082 for (i=0;i<n_constraints;i++) { 4083 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4084 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 4085 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4086 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4087 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4088 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4089 } 4090 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4091 } 4092 if (sparserhs) { 4093 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4094 } 4095 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4096 if (!pcbddc->switch_static) { 4097 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4098 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4099 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4100 for (i=0;i<n_constraints;i++) { 4101 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4102 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4103 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4104 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4105 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4106 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4107 } 4108 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4109 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4110 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4111 } else { 4112 if (lda_rhs != n_R) { 4113 IS dummy; 4114 4115 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4116 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4117 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4118 } else { 4119 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4120 pcbddc->local_auxmat2 = local_auxmat2_R; 4121 } 4122 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4123 } 4124 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4125 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 4126 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4127 if (isCHOL) { 4128 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4129 } else { 4130 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4131 } 4132 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4133 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4134 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4135 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4136 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4137 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4138 } 4139 4140 /* Get submatrices from subdomain matrix */ 4141 if (n_vertices) { 4142 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4143 PetscBool oldpin; 4144 #endif 4145 PetscBool isaij; 4146 IS is_aux; 4147 4148 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4149 IS tis; 4150 4151 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4152 ierr = ISSort(tis);CHKERRQ(ierr); 4153 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4154 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4155 } else { 4156 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4157 } 4158 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4159 oldpin = pcbddc->local_mat->boundtocpu; 4160 #endif 4161 ierr = MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);CHKERRQ(ierr); 4162 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4163 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4164 ierr = PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);CHKERRQ(ierr); 4165 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4166 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4167 } 4168 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4169 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4170 ierr = MatBindToCPU(pcbddc->local_mat,oldpin);CHKERRQ(ierr); 4171 #endif 4172 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4173 } 4174 4175 /* Matrix of coarse basis functions (local) */ 4176 if (pcbddc->coarse_phi_B) { 4177 PetscInt on_B,on_primal,on_D=n_D; 4178 if (pcbddc->coarse_phi_D) { 4179 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4180 } 4181 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4182 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4183 PetscScalar *marray; 4184 4185 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4186 ierr = PetscFree(marray);CHKERRQ(ierr); 4187 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4188 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4189 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4190 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4191 } 4192 } 4193 4194 if (!pcbddc->coarse_phi_B) { 4195 PetscScalar *marr; 4196 4197 /* memory size */ 4198 n = n_B*pcbddc->local_primal_size; 4199 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4200 if (!pcbddc->symmetric_primal) n *= 2; 4201 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4202 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4203 marr += n_B*pcbddc->local_primal_size; 4204 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4205 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4206 marr += n_D*pcbddc->local_primal_size; 4207 } 4208 if (!pcbddc->symmetric_primal) { 4209 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4210 marr += n_B*pcbddc->local_primal_size; 4211 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4212 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4213 } 4214 } else { 4215 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4216 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4217 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4218 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4219 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4220 } 4221 } 4222 } 4223 4224 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4225 p0_lidx_I = NULL; 4226 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4227 const PetscInt *idxs; 4228 4229 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4230 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4231 for (i=0;i<pcbddc->benign_n;i++) { 4232 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4233 } 4234 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4235 } 4236 4237 /* vertices */ 4238 if (n_vertices) { 4239 PetscBool restoreavr = PETSC_FALSE; 4240 4241 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4242 4243 if (n_R) { 4244 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4245 PetscBLASInt B_N,B_one = 1; 4246 const PetscScalar *x; 4247 PetscScalar *y; 4248 4249 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4250 if (need_benign_correction) { 4251 ISLocalToGlobalMapping RtoN; 4252 IS is_p0; 4253 PetscInt *idxs_p0,n; 4254 4255 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4256 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4257 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4258 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4259 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4260 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4261 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4262 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4263 } 4264 4265 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4266 if (!sparserhs || need_benign_correction) { 4267 if (lda_rhs == n_R) { 4268 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4269 } else { 4270 PetscScalar *av,*array; 4271 const PetscInt *xadj,*adjncy; 4272 PetscInt n; 4273 PetscBool flg_row; 4274 4275 array = work+lda_rhs*n_vertices; 4276 ierr = PetscArrayzero(array,lda_rhs*n_vertices);CHKERRQ(ierr); 4277 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4278 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4279 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4280 for (i=0;i<n;i++) { 4281 PetscInt j; 4282 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4283 } 4284 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4285 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4286 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4287 } 4288 if (need_benign_correction) { 4289 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4290 PetscScalar *marr; 4291 4292 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4293 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4294 4295 | 0 0 0 | (V) 4296 L = | 0 0 -1 | (P-p0) 4297 | 0 0 -1 | (p0) 4298 4299 */ 4300 for (i=0;i<reuse_solver->benign_n;i++) { 4301 const PetscScalar *vals; 4302 const PetscInt *idxs,*idxs_zero; 4303 PetscInt n,j,nz; 4304 4305 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4306 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4307 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4308 for (j=0;j<n;j++) { 4309 PetscScalar val = vals[j]; 4310 PetscInt k,col = idxs[j]; 4311 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4312 } 4313 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4314 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4315 } 4316 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4317 } 4318 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4319 Brhs = A_RV; 4320 } else { 4321 Mat tA_RVT,A_RVT; 4322 4323 if (!pcbddc->symmetric_primal) { 4324 /* A_RV already scaled by -1 */ 4325 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4326 } else { 4327 restoreavr = PETSC_TRUE; 4328 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4329 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4330 A_RVT = A_VR; 4331 } 4332 if (lda_rhs != n_R) { 4333 PetscScalar *aa; 4334 PetscInt r,*ii,*jj; 4335 PetscBool done; 4336 4337 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4338 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4339 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4340 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4341 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4342 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4343 } else { 4344 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4345 tA_RVT = A_RVT; 4346 } 4347 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4348 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4349 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4350 } 4351 if (F) { 4352 /* need to correct the rhs */ 4353 if (need_benign_correction) { 4354 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4355 PetscScalar *marr; 4356 4357 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4358 if (lda_rhs != n_R) { 4359 for (i=0;i<n_vertices;i++) { 4360 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4361 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4362 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4363 } 4364 } else { 4365 for (i=0;i<n_vertices;i++) { 4366 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4367 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4368 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4369 } 4370 } 4371 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4372 } 4373 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4374 if (restoreavr) { 4375 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4376 } 4377 /* need to correct the solution */ 4378 if (need_benign_correction) { 4379 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4380 PetscScalar *marr; 4381 4382 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4383 if (lda_rhs != n_R) { 4384 for (i=0;i<n_vertices;i++) { 4385 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4386 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4387 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4388 } 4389 } else { 4390 for (i=0;i<n_vertices;i++) { 4391 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4392 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4393 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4394 } 4395 } 4396 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4397 } 4398 } else { 4399 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4400 for (i=0;i<n_vertices;i++) { 4401 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4402 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4403 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4404 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4405 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4406 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4407 } 4408 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4409 } 4410 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4411 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4412 /* S_VV and S_CV */ 4413 if (n_constraints) { 4414 Mat B; 4415 4416 ierr = PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);CHKERRQ(ierr); 4417 for (i=0;i<n_vertices;i++) { 4418 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4419 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4420 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4421 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4422 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4423 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4424 } 4425 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4426 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4427 ierr = MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);CHKERRQ(ierr); 4428 ierr = MatProductSetType(S_CV,MATPRODUCT_AB);CHKERRQ(ierr); 4429 ierr = MatProductSetFromOptions(S_CV);CHKERRQ(ierr); 4430 ierr = MatProductSymbolic(S_CV);CHKERRQ(ierr); 4431 ierr = MatProductNumeric(S_CV);CHKERRQ(ierr); 4432 ierr = MatProductClear(S_CV);CHKERRQ(ierr); 4433 4434 ierr = MatDestroy(&B);CHKERRQ(ierr); 4435 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4436 /* Reuse B = local_auxmat2_R * S_CV */ 4437 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);CHKERRQ(ierr); 4438 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4439 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4440 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4441 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4442 4443 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4444 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4445 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4446 ierr = MatDestroy(&B);CHKERRQ(ierr); 4447 } 4448 if (lda_rhs != n_R) { 4449 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4450 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4451 ierr = MatDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4452 } 4453 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4454 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4455 if (need_benign_correction) { 4456 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4457 PetscScalar *marr,*sums; 4458 4459 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4460 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4461 for (i=0;i<reuse_solver->benign_n;i++) { 4462 const PetscScalar *vals; 4463 const PetscInt *idxs,*idxs_zero; 4464 PetscInt n,j,nz; 4465 4466 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4467 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4468 for (j=0;j<n_vertices;j++) { 4469 PetscInt k; 4470 sums[j] = 0.; 4471 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4472 } 4473 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4474 for (j=0;j<n;j++) { 4475 PetscScalar val = vals[j]; 4476 PetscInt k; 4477 for (k=0;k<n_vertices;k++) { 4478 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4479 } 4480 } 4481 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4482 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4483 } 4484 ierr = PetscFree(sums);CHKERRQ(ierr); 4485 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4486 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4487 } 4488 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4489 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4490 ierr = MatDenseGetArrayRead(A_VV,&x);CHKERRQ(ierr); 4491 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4492 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4493 ierr = MatDenseRestoreArrayRead(A_VV,&x);CHKERRQ(ierr); 4494 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4495 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4496 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4497 } else { 4498 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4499 } 4500 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4501 4502 /* coarse basis functions */ 4503 for (i=0;i<n_vertices;i++) { 4504 PetscScalar *y; 4505 4506 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4507 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4508 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4509 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4510 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4511 y[n_B*i+idx_V_B[i]] = 1.0; 4512 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4513 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4514 4515 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4516 PetscInt j; 4517 4518 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4519 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4520 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4521 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4522 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4523 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4524 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4525 } 4526 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4527 } 4528 /* if n_R == 0 the object is not destroyed */ 4529 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4530 } 4531 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4532 4533 if (n_constraints) { 4534 Mat B; 4535 4536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4537 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4538 ierr = MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);CHKERRQ(ierr); 4539 ierr = MatProductSetType(B,MATPRODUCT_AB);CHKERRQ(ierr); 4540 ierr = MatProductSetFromOptions(B);CHKERRQ(ierr); 4541 ierr = MatProductSymbolic(B);CHKERRQ(ierr); 4542 ierr = MatProductNumeric(B);CHKERRQ(ierr); 4543 4544 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4545 if (n_vertices) { 4546 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4547 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4548 } else { 4549 Mat S_VCt; 4550 4551 if (lda_rhs != n_R) { 4552 ierr = MatDestroy(&B);CHKERRQ(ierr); 4553 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4554 ierr = MatDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4555 } 4556 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4557 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4558 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4559 } 4560 } 4561 ierr = MatDestroy(&B);CHKERRQ(ierr); 4562 /* coarse basis functions */ 4563 for (i=0;i<n_constraints;i++) { 4564 PetscScalar *y; 4565 4566 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4567 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4568 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4569 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4570 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4571 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4572 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4573 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4574 PetscInt j; 4575 4576 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4577 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4578 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4579 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4580 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4581 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4582 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4583 } 4584 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4585 } 4586 } 4587 if (n_constraints) { 4588 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4589 } 4590 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4591 4592 /* coarse matrix entries relative to B_0 */ 4593 if (pcbddc->benign_n) { 4594 Mat B0_B,B0_BPHI; 4595 IS is_dummy; 4596 const PetscScalar *data; 4597 PetscInt j; 4598 4599 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4600 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4601 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4602 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4603 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4604 ierr = MatDenseGetArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4605 for (j=0;j<pcbddc->benign_n;j++) { 4606 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4607 for (i=0;i<pcbddc->local_primal_size;i++) { 4608 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4609 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4610 } 4611 } 4612 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data);CHKERRQ(ierr); 4613 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4614 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4615 } 4616 4617 /* compute other basis functions for non-symmetric problems */ 4618 if (!pcbddc->symmetric_primal) { 4619 Mat B_V=NULL,B_C=NULL; 4620 PetscScalar *marray; 4621 4622 if (n_constraints) { 4623 Mat S_CCT,C_CRT; 4624 4625 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4626 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4627 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4628 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4629 if (n_vertices) { 4630 Mat S_VCT; 4631 4632 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4633 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4634 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4635 } 4636 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4637 } else { 4638 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4639 } 4640 if (n_vertices && n_R) { 4641 PetscScalar *av,*marray; 4642 const PetscInt *xadj,*adjncy; 4643 PetscInt n; 4644 PetscBool flg_row; 4645 4646 /* B_V = B_V - A_VR^T */ 4647 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4648 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4649 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4650 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4651 for (i=0;i<n;i++) { 4652 PetscInt j; 4653 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4654 } 4655 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4656 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4657 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4658 } 4659 4660 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4661 if (n_vertices) { 4662 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4663 for (i=0;i<n_vertices;i++) { 4664 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4665 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4666 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4667 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4668 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4669 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4670 } 4671 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4672 } 4673 if (B_C) { 4674 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4675 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4676 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4677 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4678 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4679 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4680 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4681 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4682 } 4683 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4684 } 4685 /* coarse basis functions */ 4686 for (i=0;i<pcbddc->local_primal_size;i++) { 4687 PetscScalar *y; 4688 4689 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4690 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4691 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4692 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4693 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4694 if (i<n_vertices) { 4695 y[n_B*i+idx_V_B[i]] = 1.0; 4696 } 4697 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4698 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4699 4700 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4701 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4702 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4703 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4704 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4705 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4706 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4707 } 4708 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4709 } 4710 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4711 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4712 } 4713 4714 /* free memory */ 4715 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4716 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4717 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4718 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4719 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4720 ierr = PetscFree(work);CHKERRQ(ierr); 4721 if (n_vertices) { 4722 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4723 } 4724 if (n_constraints) { 4725 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4726 } 4727 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4728 4729 /* Checking coarse_sub_mat and coarse basis functios */ 4730 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4731 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4732 if (pcbddc->dbg_flag) { 4733 Mat coarse_sub_mat; 4734 Mat AUXMAT,TM1,TM2,TM3,TM4; 4735 Mat coarse_phi_D,coarse_phi_B; 4736 Mat coarse_psi_D,coarse_psi_B; 4737 Mat A_II,A_BB,A_IB,A_BI; 4738 Mat C_B,CPHI; 4739 IS is_dummy; 4740 Vec mones; 4741 MatType checkmattype=MATSEQAIJ; 4742 PetscReal real_value; 4743 4744 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4745 Mat A; 4746 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4747 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4748 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4749 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4750 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4751 ierr = MatDestroy(&A);CHKERRQ(ierr); 4752 } else { 4753 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4754 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4755 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4756 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4757 } 4758 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4759 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4760 if (!pcbddc->symmetric_primal) { 4761 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4762 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4763 } 4764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4765 4766 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4767 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4768 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4769 if (!pcbddc->symmetric_primal) { 4770 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4771 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4772 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4773 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4774 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4775 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4776 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4777 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4778 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4779 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4780 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4781 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4782 } else { 4783 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4784 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4785 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4786 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4787 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4788 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4789 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4790 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4791 } 4792 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4793 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4794 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4795 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4796 if (pcbddc->benign_n) { 4797 Mat B0_B,B0_BPHI; 4798 const PetscScalar *data2; 4799 PetscScalar *data; 4800 PetscInt j; 4801 4802 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4803 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4804 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4805 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4806 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4807 ierr = MatDenseGetArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4808 for (j=0;j<pcbddc->benign_n;j++) { 4809 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4810 for (i=0;i<pcbddc->local_primal_size;i++) { 4811 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4812 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4813 } 4814 } 4815 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4816 ierr = MatDenseRestoreArrayRead(B0_BPHI,&data2);CHKERRQ(ierr); 4817 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4818 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4819 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4820 } 4821 #if 0 4822 { 4823 PetscViewer viewer; 4824 char filename[256]; 4825 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4826 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4827 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4828 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4829 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4830 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4831 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4832 if (pcbddc->coarse_phi_B) { 4833 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4834 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4835 } 4836 if (pcbddc->coarse_phi_D) { 4837 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4838 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4839 } 4840 if (pcbddc->coarse_psi_B) { 4841 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4842 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4843 } 4844 if (pcbddc->coarse_psi_D) { 4845 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4846 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4847 } 4848 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4849 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4850 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4851 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4852 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4853 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4854 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4855 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4856 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4857 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4858 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4859 } 4860 #endif 4861 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4862 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4863 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4864 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4865 4866 /* check constraints */ 4867 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4868 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4869 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4870 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4871 } else { 4872 PetscScalar *data; 4873 Mat tmat; 4874 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4875 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4876 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4877 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4878 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4879 } 4880 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4881 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4882 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4883 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4884 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4885 if (!pcbddc->symmetric_primal) { 4886 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4887 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4888 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4889 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4890 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4891 } 4892 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4893 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4894 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4895 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4897 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4898 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4899 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4900 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4901 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4902 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4903 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4904 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4905 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4906 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4907 if (!pcbddc->symmetric_primal) { 4908 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4909 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4910 } 4911 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4912 } 4913 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4914 { 4915 PetscBool gpu; 4916 4917 ierr = PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);CHKERRQ(ierr); 4918 if (gpu) { 4919 if (pcbddc->local_auxmat1) { 4920 ierr = MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4921 } 4922 if (pcbddc->local_auxmat2) { 4923 ierr = MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4924 } 4925 if (pcbddc->coarse_phi_B) { 4926 ierr = MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4927 } 4928 if (pcbddc->coarse_phi_D) { 4929 ierr = MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4930 } 4931 if (pcbddc->coarse_psi_B) { 4932 ierr = MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4933 } 4934 if (pcbddc->coarse_psi_D) { 4935 ierr = MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4936 } 4937 } 4938 } 4939 /* get back data */ 4940 *coarse_submat_vals_n = coarse_submat_vals; 4941 PetscFunctionReturn(0); 4942 } 4943 4944 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4945 { 4946 Mat *work_mat; 4947 IS isrow_s,iscol_s; 4948 PetscBool rsorted,csorted; 4949 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4950 PetscErrorCode ierr; 4951 4952 PetscFunctionBegin; 4953 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4954 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4955 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4956 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4957 4958 if (!rsorted) { 4959 const PetscInt *idxs; 4960 PetscInt *idxs_sorted,i; 4961 4962 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4963 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4964 for (i=0;i<rsize;i++) { 4965 idxs_perm_r[i] = i; 4966 } 4967 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4968 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4969 for (i=0;i<rsize;i++) { 4970 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4971 } 4972 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4973 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4974 } else { 4975 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4976 isrow_s = isrow; 4977 } 4978 4979 if (!csorted) { 4980 if (isrow == iscol) { 4981 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4982 iscol_s = isrow_s; 4983 } else { 4984 const PetscInt *idxs; 4985 PetscInt *idxs_sorted,i; 4986 4987 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4988 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4989 for (i=0;i<csize;i++) { 4990 idxs_perm_c[i] = i; 4991 } 4992 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4993 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4994 for (i=0;i<csize;i++) { 4995 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4996 } 4997 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4998 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4999 } 5000 } else { 5001 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 5002 iscol_s = iscol; 5003 } 5004 5005 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5006 5007 if (!rsorted || !csorted) { 5008 Mat new_mat; 5009 IS is_perm_r,is_perm_c; 5010 5011 if (!rsorted) { 5012 PetscInt *idxs_r,i; 5013 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 5014 for (i=0;i<rsize;i++) { 5015 idxs_r[idxs_perm_r[i]] = i; 5016 } 5017 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 5018 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 5019 } else { 5020 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 5021 } 5022 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 5023 5024 if (!csorted) { 5025 if (isrow_s == iscol_s) { 5026 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 5027 is_perm_c = is_perm_r; 5028 } else { 5029 PetscInt *idxs_c,i; 5030 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5031 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 5032 for (i=0;i<csize;i++) { 5033 idxs_c[idxs_perm_c[i]] = i; 5034 } 5035 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 5036 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 5037 } 5038 } else { 5039 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 5040 } 5041 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 5042 5043 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 5044 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 5045 work_mat[0] = new_mat; 5046 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 5047 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 5048 } 5049 5050 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 5051 *B = work_mat[0]; 5052 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 5053 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 5054 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 5055 PetscFunctionReturn(0); 5056 } 5057 5058 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5059 { 5060 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5061 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5062 Mat new_mat,lA; 5063 IS is_local,is_global; 5064 PetscInt local_size; 5065 PetscBool isseqaij; 5066 PetscErrorCode ierr; 5067 5068 PetscFunctionBegin; 5069 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5070 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 5071 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 5072 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 5073 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 5074 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 5075 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5076 5077 if (pcbddc->dbg_flag) { 5078 Vec x,x_change; 5079 PetscReal error; 5080 5081 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 5082 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5083 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 5084 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5085 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5086 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 5087 if (!pcbddc->change_interior) { 5088 const PetscScalar *x,*y,*v; 5089 PetscReal lerror = 0.; 5090 PetscInt i; 5091 5092 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 5093 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 5094 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 5095 for (i=0;i<local_size;i++) 5096 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5097 lerror = PetscAbsScalar(x[i]-y[i]); 5098 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 5099 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 5100 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 5101 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5102 if (error > PETSC_SMALL) { 5103 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5104 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5105 } else { 5106 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5107 } 5108 } 5109 } 5110 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5111 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5112 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5113 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5114 if (error > PETSC_SMALL) { 5115 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5116 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5117 } else { 5118 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5119 } 5120 } 5121 ierr = VecDestroy(&x);CHKERRQ(ierr); 5122 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5123 } 5124 5125 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5126 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 5127 5128 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5129 ierr = PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5130 if (isseqaij) { 5131 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5132 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5133 if (lA) { 5134 Mat work; 5135 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5136 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5137 ierr = MatDestroy(&work);CHKERRQ(ierr); 5138 } 5139 } else { 5140 Mat work_mat; 5141 5142 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5143 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5144 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5145 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 5146 if (lA) { 5147 Mat work; 5148 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5149 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5150 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5151 ierr = MatDestroy(&work);CHKERRQ(ierr); 5152 } 5153 } 5154 if (matis->A->symmetric_set) { 5155 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 5156 #if !defined(PETSC_USE_COMPLEX) 5157 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 5158 #endif 5159 } 5160 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5161 PetscFunctionReturn(0); 5162 } 5163 5164 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5165 { 5166 PC_IS* pcis = (PC_IS*)(pc->data); 5167 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5168 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5169 PetscInt *idx_R_local=NULL; 5170 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5171 PetscInt vbs,bs; 5172 PetscBT bitmask=NULL; 5173 PetscErrorCode ierr; 5174 5175 PetscFunctionBegin; 5176 /* 5177 No need to setup local scatters if 5178 - primal space is unchanged 5179 AND 5180 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5181 AND 5182 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5183 */ 5184 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5185 PetscFunctionReturn(0); 5186 } 5187 /* destroy old objects */ 5188 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5189 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5190 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5191 /* Set Non-overlapping dimensions */ 5192 n_B = pcis->n_B; 5193 n_D = pcis->n - n_B; 5194 n_vertices = pcbddc->n_vertices; 5195 5196 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5197 5198 /* create auxiliary bitmask and allocate workspace */ 5199 if (!sub_schurs || !sub_schurs->reuse_solver) { 5200 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5201 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5202 for (i=0;i<n_vertices;i++) { 5203 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5204 } 5205 5206 for (i=0, n_R=0; i<pcis->n; i++) { 5207 if (!PetscBTLookup(bitmask,i)) { 5208 idx_R_local[n_R++] = i; 5209 } 5210 } 5211 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5212 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5213 5214 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5215 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5216 } 5217 5218 /* Block code */ 5219 vbs = 1; 5220 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5221 if (bs>1 && !(n_vertices%bs)) { 5222 PetscBool is_blocked = PETSC_TRUE; 5223 PetscInt *vary; 5224 if (!sub_schurs || !sub_schurs->reuse_solver) { 5225 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5226 ierr = PetscArrayzero(vary,pcis->n/bs);CHKERRQ(ierr); 5227 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5228 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5229 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5230 for (i=0; i<pcis->n/bs; i++) { 5231 if (vary[i]!=0 && vary[i]!=bs) { 5232 is_blocked = PETSC_FALSE; 5233 break; 5234 } 5235 } 5236 ierr = PetscFree(vary);CHKERRQ(ierr); 5237 } else { 5238 /* Verify directly the R set */ 5239 for (i=0; i<n_R/bs; i++) { 5240 PetscInt j,node=idx_R_local[bs*i]; 5241 for (j=1; j<bs; j++) { 5242 if (node != idx_R_local[bs*i+j]-j) { 5243 is_blocked = PETSC_FALSE; 5244 break; 5245 } 5246 } 5247 } 5248 } 5249 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5250 vbs = bs; 5251 for (i=0;i<n_R/vbs;i++) { 5252 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5253 } 5254 } 5255 } 5256 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5257 if (sub_schurs && sub_schurs->reuse_solver) { 5258 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5259 5260 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5261 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5262 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5263 reuse_solver->is_R = pcbddc->is_R_local; 5264 } else { 5265 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5266 } 5267 5268 /* print some info if requested */ 5269 if (pcbddc->dbg_flag) { 5270 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5271 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5272 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5273 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5277 } 5278 5279 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5280 if (!sub_schurs || !sub_schurs->reuse_solver) { 5281 IS is_aux1,is_aux2; 5282 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5283 5284 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5285 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5286 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5287 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5288 for (i=0; i<n_D; i++) { 5289 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5290 } 5291 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5292 for (i=0, j=0; i<n_R; i++) { 5293 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5294 aux_array1[j++] = i; 5295 } 5296 } 5297 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5298 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5299 for (i=0, j=0; i<n_B; i++) { 5300 if (!PetscBTLookup(bitmask,is_indices[i])) { 5301 aux_array2[j++] = i; 5302 } 5303 } 5304 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5305 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5306 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5307 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5308 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5309 5310 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5311 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5312 for (i=0, j=0; i<n_R; i++) { 5313 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5314 aux_array1[j++] = i; 5315 } 5316 } 5317 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5318 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5319 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5320 } 5321 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5322 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5323 } else { 5324 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5325 IS tis; 5326 PetscInt schur_size; 5327 5328 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5329 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5330 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5331 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5332 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5333 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5334 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5335 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5336 } 5337 } 5338 PetscFunctionReturn(0); 5339 } 5340 5341 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5342 { 5343 MatNullSpace NullSpace; 5344 Mat dmat; 5345 const Vec *nullvecs; 5346 Vec v,v2,*nullvecs2; 5347 VecScatter sct = NULL; 5348 PetscContainer c; 5349 PetscScalar *ddata; 5350 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5351 PetscBool nnsp_has_cnst; 5352 PetscErrorCode ierr; 5353 5354 PetscFunctionBegin; 5355 if (!is && !B) { /* MATIS */ 5356 Mat_IS* matis = (Mat_IS*)A->data; 5357 5358 if (!B) { 5359 ierr = MatISGetLocalMat(A,&B);CHKERRQ(ierr); 5360 } 5361 sct = matis->cctx; 5362 ierr = PetscObjectReference((PetscObject)sct);CHKERRQ(ierr); 5363 } else { 5364 ierr = MatGetNullSpace(B,&NullSpace);CHKERRQ(ierr); 5365 if (!NullSpace) { 5366 ierr = MatGetNearNullSpace(B,&NullSpace);CHKERRQ(ierr); 5367 } 5368 if (NullSpace) PetscFunctionReturn(0); 5369 } 5370 ierr = MatGetNullSpace(A,&NullSpace);CHKERRQ(ierr); 5371 if (!NullSpace) { 5372 ierr = MatGetNearNullSpace(A,&NullSpace);CHKERRQ(ierr); 5373 } 5374 if (!NullSpace) PetscFunctionReturn(0); 5375 5376 ierr = MatCreateVecs(A,&v,NULL);CHKERRQ(ierr); 5377 ierr = MatCreateVecs(B,&v2,NULL);CHKERRQ(ierr); 5378 if (!sct) { 5379 ierr = VecScatterCreate(v,is,v2,NULL,&sct);CHKERRQ(ierr); 5380 } 5381 ierr = MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);CHKERRQ(ierr); 5382 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5383 ierr = PetscMalloc1(bsiz,&nullvecs2);CHKERRQ(ierr); 5384 ierr = VecGetBlockSize(v2,&bs);CHKERRQ(ierr); 5385 ierr = VecGetSize(v2,&N);CHKERRQ(ierr); 5386 ierr = VecGetLocalSize(v2,&n);CHKERRQ(ierr); 5387 ierr = PetscMalloc1(n*bsiz,&ddata);CHKERRQ(ierr); 5388 for (k=0;k<nnsp_size;k++) { 5389 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);CHKERRQ(ierr); 5390 ierr = VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5391 ierr = VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5392 } 5393 if (nnsp_has_cnst) { 5394 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);CHKERRQ(ierr); 5395 ierr = VecSet(nullvecs2[nnsp_size],1.0);CHKERRQ(ierr); 5396 } 5397 ierr = PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);CHKERRQ(ierr); 5398 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);CHKERRQ(ierr); 5399 5400 ierr = MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);CHKERRQ(ierr); 5401 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);CHKERRQ(ierr); 5402 ierr = PetscContainerSetPointer(c,ddata);CHKERRQ(ierr); 5403 ierr = PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 5404 ierr = PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);CHKERRQ(ierr); 5405 ierr = PetscContainerDestroy(&c);CHKERRQ(ierr); 5406 ierr = PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);CHKERRQ(ierr); 5407 ierr = MatDestroy(&dmat);CHKERRQ(ierr); 5408 5409 for (k=0;k<bsiz;k++) { 5410 ierr = VecDestroy(&nullvecs2[k]);CHKERRQ(ierr); 5411 } 5412 ierr = PetscFree(nullvecs2);CHKERRQ(ierr); 5413 ierr = MatSetNearNullSpace(B,NullSpace);CHKERRQ(ierr); 5414 ierr = MatNullSpaceDestroy(&NullSpace);CHKERRQ(ierr); 5415 ierr = VecDestroy(&v);CHKERRQ(ierr); 5416 ierr = VecDestroy(&v2);CHKERRQ(ierr); 5417 ierr = VecScatterDestroy(&sct);CHKERRQ(ierr); 5418 PetscFunctionReturn(0); 5419 } 5420 5421 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5422 { 5423 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5424 PC_IS *pcis = (PC_IS*)pc->data; 5425 PC pc_temp; 5426 Mat A_RR; 5427 MatNullSpace nnsp; 5428 MatReuse reuse; 5429 PetscScalar m_one = -1.0; 5430 PetscReal value; 5431 PetscInt n_D,n_R; 5432 PetscBool issbaij,opts; 5433 PetscErrorCode ierr; 5434 void (*f)(void) = NULL; 5435 char dir_prefix[256],neu_prefix[256],str_level[16]; 5436 size_t len; 5437 5438 PetscFunctionBegin; 5439 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5440 /* approximate solver, propagate NearNullSpace if needed */ 5441 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5442 MatNullSpace gnnsp1,gnnsp2; 5443 PetscBool lhas,ghas; 5444 5445 ierr = MatGetNearNullSpace(pcbddc->local_mat,&nnsp);CHKERRQ(ierr); 5446 ierr = MatGetNearNullSpace(pc->pmat,&gnnsp1);CHKERRQ(ierr); 5447 ierr = MatGetNullSpace(pc->pmat,&gnnsp2);CHKERRQ(ierr); 5448 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5449 ierr = MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5450 if (!ghas && (gnnsp1 || gnnsp2)) { 5451 ierr = MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);CHKERRQ(ierr); 5452 } 5453 } 5454 5455 /* compute prefixes */ 5456 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5457 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5458 if (!pcbddc->current_level) { 5459 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5460 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5461 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5462 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5463 } else { 5464 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5465 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5466 len -= 15; /* remove "pc_bddc_coarse_" */ 5467 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5468 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5469 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5470 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5471 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5472 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5473 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5474 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5475 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5476 } 5477 5478 /* DIRICHLET PROBLEM */ 5479 if (dirichlet) { 5480 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5481 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5482 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5483 if (pcbddc->dbg_flag) { 5484 Mat A_IIn; 5485 5486 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5487 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5488 pcis->A_II = A_IIn; 5489 } 5490 } 5491 if (pcbddc->local_mat->symmetric_set) { 5492 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5493 } 5494 /* Matrix for Dirichlet problem is pcis->A_II */ 5495 n_D = pcis->n - pcis->n_B; 5496 opts = PETSC_FALSE; 5497 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5498 opts = PETSC_TRUE; 5499 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5500 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5501 /* default */ 5502 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5503 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5504 ierr = PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5505 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5506 if (issbaij) { 5507 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5508 } else { 5509 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5510 } 5511 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5512 } 5513 ierr = MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5514 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);CHKERRQ(ierr); 5515 /* Allow user's customization */ 5516 if (opts) { 5517 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5518 } 5519 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5520 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5521 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);CHKERRQ(ierr); 5522 } 5523 ierr = MatGetNearNullSpace(pcis->pA_II,&nnsp);CHKERRQ(ierr); 5524 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5525 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5526 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5527 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5528 const PetscInt *idxs; 5529 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5530 5531 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5532 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5533 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5534 for (i=0;i<nl;i++) { 5535 for (d=0;d<cdim;d++) { 5536 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5537 } 5538 } 5539 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5540 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5541 ierr = PetscFree(scoords);CHKERRQ(ierr); 5542 } 5543 if (sub_schurs && sub_schurs->reuse_solver) { 5544 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5545 5546 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5547 } 5548 5549 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5550 if (!n_D) { 5551 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5552 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5553 } 5554 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5555 /* set ksp_D into pcis data */ 5556 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5557 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5558 pcis->ksp_D = pcbddc->ksp_D; 5559 } 5560 5561 /* NEUMANN PROBLEM */ 5562 A_RR = NULL; 5563 if (neumann) { 5564 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5565 PetscInt ibs,mbs; 5566 PetscBool issbaij, reuse_neumann_solver; 5567 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5568 5569 reuse_neumann_solver = PETSC_FALSE; 5570 if (sub_schurs && sub_schurs->reuse_solver) { 5571 IS iP; 5572 5573 reuse_neumann_solver = PETSC_TRUE; 5574 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5575 if (iP) reuse_neumann_solver = PETSC_FALSE; 5576 } 5577 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5578 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5579 if (pcbddc->ksp_R) { /* already created ksp */ 5580 PetscInt nn_R; 5581 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5582 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5583 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5584 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5585 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5586 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5587 reuse = MAT_INITIAL_MATRIX; 5588 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5589 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5590 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5591 reuse = MAT_INITIAL_MATRIX; 5592 } else { /* safe to reuse the matrix */ 5593 reuse = MAT_REUSE_MATRIX; 5594 } 5595 } 5596 /* last check */ 5597 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5598 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5599 reuse = MAT_INITIAL_MATRIX; 5600 } 5601 } else { /* first time, so we need to create the matrix */ 5602 reuse = MAT_INITIAL_MATRIX; 5603 } 5604 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5605 TODO: Get Rid of these conversions */ 5606 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5607 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5608 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5609 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5610 if (matis->A == pcbddc->local_mat) { 5611 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5612 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5613 } else { 5614 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5615 } 5616 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5617 if (matis->A == pcbddc->local_mat) { 5618 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5619 ierr = MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5620 } else { 5621 ierr = MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5622 } 5623 } 5624 /* extract A_RR */ 5625 if (reuse_neumann_solver) { 5626 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5627 5628 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5629 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5630 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5631 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5632 } else { 5633 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5634 } 5635 } else { 5636 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5637 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5638 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5639 } 5640 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5641 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5642 } 5643 if (pcbddc->local_mat->symmetric_set) { 5644 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);CHKERRQ(ierr); 5645 } 5646 opts = PETSC_FALSE; 5647 if (!pcbddc->ksp_R) { /* create object if not present */ 5648 opts = PETSC_TRUE; 5649 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5650 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5651 /* default */ 5652 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5653 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5654 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5655 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5656 if (issbaij) { 5657 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5658 } else { 5659 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5660 } 5661 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5662 } 5663 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5664 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5665 if (opts) { /* Allow user's customization once */ 5666 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5667 } 5668 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5669 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5670 ierr = MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);CHKERRQ(ierr); 5671 } 5672 ierr = MatGetNearNullSpace(A_RR,&nnsp);CHKERRQ(ierr); 5673 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5674 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5675 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5676 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5677 const PetscInt *idxs; 5678 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5679 5680 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5681 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5682 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5683 for (i=0;i<nl;i++) { 5684 for (d=0;d<cdim;d++) { 5685 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5686 } 5687 } 5688 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5689 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5690 ierr = PetscFree(scoords);CHKERRQ(ierr); 5691 } 5692 5693 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5694 if (!n_R) { 5695 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5696 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5697 } 5698 /* Reuse solver if it is present */ 5699 if (reuse_neumann_solver) { 5700 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5701 5702 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5703 } 5704 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5705 } 5706 5707 if (pcbddc->dbg_flag) { 5708 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5709 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5710 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5711 } 5712 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5713 5714 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5715 if (pcbddc->NullSpace_corr[0]) { 5716 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5717 } 5718 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5719 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5720 } 5721 if (neumann && pcbddc->NullSpace_corr[2]) { 5722 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5723 } 5724 /* check Dirichlet and Neumann solvers */ 5725 if (pcbddc->dbg_flag) { 5726 if (dirichlet) { /* Dirichlet */ 5727 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5728 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5729 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5730 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 5731 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5732 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5733 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5734 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5735 } 5736 if (neumann) { /* Neumann */ 5737 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5738 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5739 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5740 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 5741 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5742 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5744 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5745 } 5746 } 5747 /* free Neumann problem's matrix */ 5748 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5749 PetscFunctionReturn(0); 5750 } 5751 5752 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5753 { 5754 PetscErrorCode ierr; 5755 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5756 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5757 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5758 5759 PetscFunctionBegin; 5760 if (!reuse_solver) { 5761 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5762 } 5763 if (!pcbddc->switch_static) { 5764 if (applytranspose && pcbddc->local_auxmat1) { 5765 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5766 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5767 } 5768 if (!reuse_solver) { 5769 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5770 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5771 } else { 5772 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5773 5774 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5775 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5776 } 5777 } else { 5778 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5779 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5780 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5781 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5782 if (applytranspose && pcbddc->local_auxmat1) { 5783 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5784 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5785 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5786 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5787 } 5788 } 5789 if (!reuse_solver || pcbddc->switch_static) { 5790 if (applytranspose) { 5791 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5792 } else { 5793 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5794 } 5795 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);CHKERRQ(ierr); 5796 } else { 5797 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5798 5799 if (applytranspose) { 5800 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5801 } else { 5802 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5803 } 5804 } 5805 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5806 if (!pcbddc->switch_static) { 5807 if (!reuse_solver) { 5808 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5809 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5810 } else { 5811 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5812 5813 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5814 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5815 } 5816 if (!applytranspose && pcbddc->local_auxmat1) { 5817 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5818 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5819 } 5820 } else { 5821 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5822 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5823 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5824 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5825 if (!applytranspose && pcbddc->local_auxmat1) { 5826 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5827 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5828 } 5829 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5830 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5831 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5832 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5833 } 5834 PetscFunctionReturn(0); 5835 } 5836 5837 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5838 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5839 { 5840 PetscErrorCode ierr; 5841 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5842 PC_IS* pcis = (PC_IS*) (pc->data); 5843 const PetscScalar zero = 0.0; 5844 5845 PetscFunctionBegin; 5846 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5847 if (!pcbddc->benign_apply_coarse_only) { 5848 if (applytranspose) { 5849 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5850 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5851 } else { 5852 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5853 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5854 } 5855 } else { 5856 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5857 } 5858 5859 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5860 if (pcbddc->benign_n) { 5861 PetscScalar *array; 5862 PetscInt j; 5863 5864 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5865 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5866 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5867 } 5868 5869 /* start communications from local primal nodes to rhs of coarse solver */ 5870 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5871 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5872 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5873 5874 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5875 if (pcbddc->coarse_ksp) { 5876 Mat coarse_mat; 5877 Vec rhs,sol; 5878 MatNullSpace nullsp; 5879 PetscBool isbddc = PETSC_FALSE; 5880 5881 if (pcbddc->benign_have_null) { 5882 PC coarse_pc; 5883 5884 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5885 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5886 /* we need to propagate to coarser levels the need for a possible benign correction */ 5887 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5888 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5889 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5890 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5891 } 5892 } 5893 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5894 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5895 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5896 if (applytranspose) { 5897 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5898 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5899 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5900 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5901 if (nullsp) { 5902 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5903 } 5904 } else { 5905 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5906 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5907 PC coarse_pc; 5908 5909 if (nullsp) { 5910 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5911 } 5912 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5913 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5914 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5915 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5916 } else { 5917 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5918 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5919 if (nullsp) { 5920 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5921 } 5922 } 5923 } 5924 /* we don't need the benign correction at coarser levels anymore */ 5925 if (pcbddc->benign_have_null && isbddc) { 5926 PC coarse_pc; 5927 PC_BDDC* coarsepcbddc; 5928 5929 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5930 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5931 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5932 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5933 } 5934 } 5935 5936 /* Local solution on R nodes */ 5937 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5938 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5939 } 5940 /* communications from coarse sol to local primal nodes */ 5941 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5942 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5943 5944 /* Sum contributions from the two levels */ 5945 if (!pcbddc->benign_apply_coarse_only) { 5946 if (applytranspose) { 5947 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5948 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5949 } else { 5950 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5951 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5952 } 5953 /* store p0 */ 5954 if (pcbddc->benign_n) { 5955 PetscScalar *array; 5956 PetscInt j; 5957 5958 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5959 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5960 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5961 } 5962 } else { /* expand the coarse solution */ 5963 if (applytranspose) { 5964 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5965 } else { 5966 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5967 } 5968 } 5969 PetscFunctionReturn(0); 5970 } 5971 5972 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5973 { 5974 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5975 Vec from,to; 5976 const PetscScalar *array; 5977 PetscErrorCode ierr; 5978 5979 PetscFunctionBegin; 5980 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5981 from = pcbddc->coarse_vec; 5982 to = pcbddc->vec1_P; 5983 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5984 Vec tvec; 5985 5986 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5987 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5988 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5989 ierr = VecGetArrayRead(tvec,&array);CHKERRQ(ierr); 5990 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5991 ierr = VecRestoreArrayRead(tvec,&array);CHKERRQ(ierr); 5992 } 5993 } else { /* from local to global -> put data in coarse right hand side */ 5994 from = pcbddc->vec1_P; 5995 to = pcbddc->coarse_vec; 5996 } 5997 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5998 PetscFunctionReturn(0); 5999 } 6000 6001 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6002 { 6003 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 6004 Vec from,to; 6005 const PetscScalar *array; 6006 PetscErrorCode ierr; 6007 6008 PetscFunctionBegin; 6009 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6010 from = pcbddc->coarse_vec; 6011 to = pcbddc->vec1_P; 6012 } else { /* from local to global -> put data in coarse right hand side */ 6013 from = pcbddc->vec1_P; 6014 to = pcbddc->coarse_vec; 6015 } 6016 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 6017 if (smode == SCATTER_FORWARD) { 6018 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6019 Vec tvec; 6020 6021 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 6022 ierr = VecGetArrayRead(to,&array);CHKERRQ(ierr); 6023 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 6024 ierr = VecRestoreArrayRead(to,&array);CHKERRQ(ierr); 6025 } 6026 } else { 6027 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6028 ierr = VecResetArray(from);CHKERRQ(ierr); 6029 } 6030 } 6031 PetscFunctionReturn(0); 6032 } 6033 6034 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6035 { 6036 PetscErrorCode ierr; 6037 PC_IS* pcis = (PC_IS*)(pc->data); 6038 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6039 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6040 /* one and zero */ 6041 PetscScalar one=1.0,zero=0.0; 6042 /* space to store constraints and their local indices */ 6043 PetscScalar *constraints_data; 6044 PetscInt *constraints_idxs,*constraints_idxs_B; 6045 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6046 PetscInt *constraints_n; 6047 /* iterators */ 6048 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6049 /* BLAS integers */ 6050 PetscBLASInt lwork,lierr; 6051 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6052 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6053 /* reuse */ 6054 PetscInt olocal_primal_size,olocal_primal_size_cc; 6055 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6056 /* change of basis */ 6057 PetscBool qr_needed; 6058 PetscBT change_basis,qr_needed_idx; 6059 /* auxiliary stuff */ 6060 PetscInt *nnz,*is_indices; 6061 PetscInt ncc; 6062 /* some quantities */ 6063 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6064 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6065 PetscReal tol; /* tolerance for retaining eigenmodes */ 6066 6067 PetscFunctionBegin; 6068 tol = PetscSqrtReal(PETSC_SMALL); 6069 /* Destroy Mat objects computed previously */ 6070 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6071 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6072 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 6073 /* save info on constraints from previous setup (if any) */ 6074 olocal_primal_size = pcbddc->local_primal_size; 6075 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6076 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 6077 ierr = PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);CHKERRQ(ierr); 6078 ierr = PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);CHKERRQ(ierr); 6079 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6080 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6081 6082 if (!pcbddc->adaptive_selection) { 6083 IS ISForVertices,*ISForFaces,*ISForEdges; 6084 MatNullSpace nearnullsp; 6085 const Vec *nearnullvecs; 6086 Vec *localnearnullsp; 6087 PetscScalar *array; 6088 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6089 PetscBool nnsp_has_cnst; 6090 /* LAPACK working arrays for SVD or POD */ 6091 PetscBool skip_lapack,boolforchange; 6092 PetscScalar *work; 6093 PetscReal *singular_vals; 6094 #if defined(PETSC_USE_COMPLEX) 6095 PetscReal *rwork; 6096 #endif 6097 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6098 PetscBLASInt dummy_int=1; 6099 PetscScalar dummy_scalar=1.; 6100 PetscBool use_pod = PETSC_FALSE; 6101 6102 /* MKL SVD with same input gives different results on different processes! */ 6103 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL) 6104 use_pod = PETSC_TRUE; 6105 #endif 6106 /* Get index sets for faces, edges and vertices from graph */ 6107 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 6108 /* print some info */ 6109 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6110 PetscInt nv; 6111 6112 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6113 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 6114 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6115 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6116 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6117 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 6118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 6119 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6120 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6121 } 6122 6123 /* free unneeded index sets */ 6124 if (!pcbddc->use_vertices) { 6125 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6126 } 6127 if (!pcbddc->use_edges) { 6128 for (i=0;i<n_ISForEdges;i++) { 6129 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6130 } 6131 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6132 n_ISForEdges = 0; 6133 } 6134 if (!pcbddc->use_faces) { 6135 for (i=0;i<n_ISForFaces;i++) { 6136 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6137 } 6138 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6139 n_ISForFaces = 0; 6140 } 6141 6142 /* check if near null space is attached to global mat */ 6143 if (pcbddc->use_nnsp) { 6144 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 6145 } else nearnullsp = NULL; 6146 6147 if (nearnullsp) { 6148 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 6149 /* remove any stored info */ 6150 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 6151 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6152 /* store information for BDDC solver reuse */ 6153 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 6154 pcbddc->onearnullspace = nearnullsp; 6155 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 6156 for (i=0;i<nnsp_size;i++) { 6157 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 6158 } 6159 } else { /* if near null space is not provided BDDC uses constants by default */ 6160 nnsp_size = 0; 6161 nnsp_has_cnst = PETSC_TRUE; 6162 } 6163 /* get max number of constraints on a single cc */ 6164 max_constraints = nnsp_size; 6165 if (nnsp_has_cnst) max_constraints++; 6166 6167 /* 6168 Evaluate maximum storage size needed by the procedure 6169 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6170 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6171 There can be multiple constraints per connected component 6172 */ 6173 n_vertices = 0; 6174 if (ISForVertices) { 6175 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 6176 } 6177 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6178 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 6179 6180 total_counts = n_ISForFaces+n_ISForEdges; 6181 total_counts *= max_constraints; 6182 total_counts += n_vertices; 6183 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 6184 6185 total_counts = 0; 6186 max_size_of_constraint = 0; 6187 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6188 IS used_is; 6189 if (i<n_ISForEdges) { 6190 used_is = ISForEdges[i]; 6191 } else { 6192 used_is = ISForFaces[i-n_ISForEdges]; 6193 } 6194 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 6195 total_counts += j; 6196 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6197 } 6198 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 6199 6200 /* get local part of global near null space vectors */ 6201 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 6202 for (k=0;k<nnsp_size;k++) { 6203 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 6204 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6205 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6206 } 6207 6208 /* whether or not to skip lapack calls */ 6209 skip_lapack = PETSC_TRUE; 6210 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6211 6212 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6213 if (!skip_lapack) { 6214 PetscScalar temp_work; 6215 6216 if (use_pod) { 6217 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6218 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 6219 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 6220 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 6221 #if defined(PETSC_USE_COMPLEX) 6222 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 6223 #endif 6224 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6225 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6226 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 6227 lwork = -1; 6228 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6229 #if !defined(PETSC_USE_COMPLEX) 6230 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6231 #else 6232 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6233 #endif 6234 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6235 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6236 } else { 6237 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6238 /* SVD */ 6239 PetscInt max_n,min_n; 6240 max_n = max_size_of_constraint; 6241 min_n = max_constraints; 6242 if (max_size_of_constraint < max_constraints) { 6243 min_n = max_size_of_constraint; 6244 max_n = max_constraints; 6245 } 6246 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 6247 #if defined(PETSC_USE_COMPLEX) 6248 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 6249 #endif 6250 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6251 lwork = -1; 6252 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 6253 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 6254 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 6255 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6256 #if !defined(PETSC_USE_COMPLEX) 6257 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6258 #else 6259 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6260 #endif 6261 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6262 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6263 #else 6264 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6265 #endif /* on missing GESVD */ 6266 } 6267 /* Allocate optimal workspace */ 6268 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 6269 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 6270 } 6271 /* Now we can loop on constraining sets */ 6272 total_counts = 0; 6273 constraints_idxs_ptr[0] = 0; 6274 constraints_data_ptr[0] = 0; 6275 /* vertices */ 6276 if (n_vertices) { 6277 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6278 ierr = PetscArraycpy(constraints_idxs,is_indices,n_vertices);CHKERRQ(ierr); 6279 for (i=0;i<n_vertices;i++) { 6280 constraints_n[total_counts] = 1; 6281 constraints_data[total_counts] = 1.0; 6282 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6283 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6284 total_counts++; 6285 } 6286 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6287 n_vertices = total_counts; 6288 } 6289 6290 /* edges and faces */ 6291 total_counts_cc = total_counts; 6292 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6293 IS used_is; 6294 PetscBool idxs_copied = PETSC_FALSE; 6295 6296 if (ncc<n_ISForEdges) { 6297 used_is = ISForEdges[ncc]; 6298 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6299 } else { 6300 used_is = ISForFaces[ncc-n_ISForEdges]; 6301 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6302 } 6303 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6304 6305 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6306 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6307 /* change of basis should not be performed on local periodic nodes */ 6308 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6309 if (nnsp_has_cnst) { 6310 PetscScalar quad_value; 6311 6312 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6313 idxs_copied = PETSC_TRUE; 6314 6315 if (!pcbddc->use_nnsp_true) { 6316 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6317 } else { 6318 quad_value = 1.0; 6319 } 6320 for (j=0;j<size_of_constraint;j++) { 6321 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6322 } 6323 temp_constraints++; 6324 total_counts++; 6325 } 6326 for (k=0;k<nnsp_size;k++) { 6327 PetscReal real_value; 6328 PetscScalar *ptr_to_data; 6329 6330 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6331 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6332 for (j=0;j<size_of_constraint;j++) { 6333 ptr_to_data[j] = array[is_indices[j]]; 6334 } 6335 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6336 /* check if array is null on the connected component */ 6337 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6338 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6339 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6340 temp_constraints++; 6341 total_counts++; 6342 if (!idxs_copied) { 6343 ierr = PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);CHKERRQ(ierr); 6344 idxs_copied = PETSC_TRUE; 6345 } 6346 } 6347 } 6348 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6349 valid_constraints = temp_constraints; 6350 if (!pcbddc->use_nnsp_true && temp_constraints) { 6351 if (temp_constraints == 1) { /* just normalize the constraint */ 6352 PetscScalar norm,*ptr_to_data; 6353 6354 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6355 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6356 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6357 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6358 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6359 } else { /* perform SVD */ 6360 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6361 6362 if (use_pod) { 6363 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6364 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6365 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6366 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6367 from that computed using LAPACKgesvd 6368 -> This is due to a different computation of eigenvectors in LAPACKheev 6369 -> The quality of the POD-computed basis will be the same */ 6370 ierr = PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);CHKERRQ(ierr); 6371 /* Store upper triangular part of correlation matrix */ 6372 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6373 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6374 for (j=0;j<temp_constraints;j++) { 6375 for (k=0;k<j+1;k++) { 6376 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6377 } 6378 } 6379 /* compute eigenvalues and eigenvectors of correlation matrix */ 6380 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6381 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6382 #if !defined(PETSC_USE_COMPLEX) 6383 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6384 #else 6385 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6386 #endif 6387 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6388 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6389 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6390 j = 0; 6391 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6392 total_counts = total_counts-j; 6393 valid_constraints = temp_constraints-j; 6394 /* scale and copy POD basis into used quadrature memory */ 6395 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6396 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6397 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6398 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6399 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6400 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6401 if (j<temp_constraints) { 6402 PetscInt ii; 6403 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6404 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6405 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6406 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6407 for (k=0;k<temp_constraints-j;k++) { 6408 for (ii=0;ii<size_of_constraint;ii++) { 6409 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6410 } 6411 } 6412 } 6413 } else { 6414 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6415 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6416 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6417 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6418 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6419 #if !defined(PETSC_USE_COMPLEX) 6420 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6421 #else 6422 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6423 #endif 6424 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6425 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6426 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6427 k = temp_constraints; 6428 if (k > size_of_constraint) k = size_of_constraint; 6429 j = 0; 6430 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6431 valid_constraints = k-j; 6432 total_counts = total_counts-temp_constraints+valid_constraints; 6433 #else 6434 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6435 #endif /* on missing GESVD */ 6436 } 6437 } 6438 } 6439 /* update pointers information */ 6440 if (valid_constraints) { 6441 constraints_n[total_counts_cc] = valid_constraints; 6442 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6443 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6444 /* set change_of_basis flag */ 6445 if (boolforchange) { 6446 PetscBTSet(change_basis,total_counts_cc); 6447 } 6448 total_counts_cc++; 6449 } 6450 } 6451 /* free workspace */ 6452 if (!skip_lapack) { 6453 ierr = PetscFree(work);CHKERRQ(ierr); 6454 #if defined(PETSC_USE_COMPLEX) 6455 ierr = PetscFree(rwork);CHKERRQ(ierr); 6456 #endif 6457 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6458 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6459 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6460 } 6461 for (k=0;k<nnsp_size;k++) { 6462 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6463 } 6464 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6465 /* free index sets of faces, edges and vertices */ 6466 for (i=0;i<n_ISForFaces;i++) { 6467 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6468 } 6469 if (n_ISForFaces) { 6470 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6471 } 6472 for (i=0;i<n_ISForEdges;i++) { 6473 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6474 } 6475 if (n_ISForEdges) { 6476 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6477 } 6478 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6479 } else { 6480 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6481 6482 total_counts = 0; 6483 n_vertices = 0; 6484 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6485 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6486 } 6487 max_constraints = 0; 6488 total_counts_cc = 0; 6489 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6490 total_counts += pcbddc->adaptive_constraints_n[i]; 6491 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6492 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6493 } 6494 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6495 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6496 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6497 constraints_data = pcbddc->adaptive_constraints_data; 6498 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6499 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6500 total_counts_cc = 0; 6501 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6502 if (pcbddc->adaptive_constraints_n[i]) { 6503 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6504 } 6505 } 6506 6507 max_size_of_constraint = 0; 6508 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6509 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6510 /* Change of basis */ 6511 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6512 if (pcbddc->use_change_of_basis) { 6513 for (i=0;i<sub_schurs->n_subs;i++) { 6514 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6515 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6516 } 6517 } 6518 } 6519 } 6520 pcbddc->local_primal_size = total_counts; 6521 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6522 6523 /* map constraints_idxs in boundary numbering */ 6524 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6525 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6526 6527 /* Create constraint matrix */ 6528 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6529 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6530 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6531 6532 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6533 /* determine if a QR strategy is needed for change of basis */ 6534 qr_needed = pcbddc->use_qr_single; 6535 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6536 total_primal_vertices=0; 6537 pcbddc->local_primal_size_cc = 0; 6538 for (i=0;i<total_counts_cc;i++) { 6539 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6540 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6541 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6542 pcbddc->local_primal_size_cc += 1; 6543 } else if (PetscBTLookup(change_basis,i)) { 6544 for (k=0;k<constraints_n[i];k++) { 6545 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6546 } 6547 pcbddc->local_primal_size_cc += constraints_n[i]; 6548 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6549 PetscBTSet(qr_needed_idx,i); 6550 qr_needed = PETSC_TRUE; 6551 } 6552 } else { 6553 pcbddc->local_primal_size_cc += 1; 6554 } 6555 } 6556 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6557 pcbddc->n_vertices = total_primal_vertices; 6558 /* permute indices in order to have a sorted set of vertices */ 6559 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6560 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6561 ierr = PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);CHKERRQ(ierr); 6562 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6563 6564 /* nonzero structure of constraint matrix */ 6565 /* and get reference dof for local constraints */ 6566 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6567 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6568 6569 j = total_primal_vertices; 6570 total_counts = total_primal_vertices; 6571 cum = total_primal_vertices; 6572 for (i=n_vertices;i<total_counts_cc;i++) { 6573 if (!PetscBTLookup(change_basis,i)) { 6574 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6575 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6576 cum++; 6577 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6578 for (k=0;k<constraints_n[i];k++) { 6579 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6580 nnz[j+k] = size_of_constraint; 6581 } 6582 j += constraints_n[i]; 6583 } 6584 } 6585 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6586 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6587 ierr = PetscFree(nnz);CHKERRQ(ierr); 6588 6589 /* set values in constraint matrix */ 6590 for (i=0;i<total_primal_vertices;i++) { 6591 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6592 } 6593 total_counts = total_primal_vertices; 6594 for (i=n_vertices;i<total_counts_cc;i++) { 6595 if (!PetscBTLookup(change_basis,i)) { 6596 PetscInt *cols; 6597 6598 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6599 cols = constraints_idxs+constraints_idxs_ptr[i]; 6600 for (k=0;k<constraints_n[i];k++) { 6601 PetscInt row = total_counts+k; 6602 PetscScalar *vals; 6603 6604 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6605 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6606 } 6607 total_counts += constraints_n[i]; 6608 } 6609 } 6610 /* assembling */ 6611 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6612 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6613 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6614 6615 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6616 if (pcbddc->use_change_of_basis) { 6617 /* dual and primal dofs on a single cc */ 6618 PetscInt dual_dofs,primal_dofs; 6619 /* working stuff for GEQRF */ 6620 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6621 PetscBLASInt lqr_work; 6622 /* working stuff for UNGQR */ 6623 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6624 PetscBLASInt lgqr_work; 6625 /* working stuff for TRTRS */ 6626 PetscScalar *trs_rhs = NULL; 6627 PetscBLASInt Blas_NRHS; 6628 /* pointers for values insertion into change of basis matrix */ 6629 PetscInt *start_rows,*start_cols; 6630 PetscScalar *start_vals; 6631 /* working stuff for values insertion */ 6632 PetscBT is_primal; 6633 PetscInt *aux_primal_numbering_B; 6634 /* matrix sizes */ 6635 PetscInt global_size,local_size; 6636 /* temporary change of basis */ 6637 Mat localChangeOfBasisMatrix; 6638 /* extra space for debugging */ 6639 PetscScalar *dbg_work = NULL; 6640 6641 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6642 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6643 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6644 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6645 /* nonzeros for local mat */ 6646 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6647 if (!pcbddc->benign_change || pcbddc->fake_change) { 6648 for (i=0;i<pcis->n;i++) nnz[i]=1; 6649 } else { 6650 const PetscInt *ii; 6651 PetscInt n; 6652 PetscBool flg_row; 6653 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6654 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6655 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6656 } 6657 for (i=n_vertices;i<total_counts_cc;i++) { 6658 if (PetscBTLookup(change_basis,i)) { 6659 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6660 if (PetscBTLookup(qr_needed_idx,i)) { 6661 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6662 } else { 6663 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6664 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6665 } 6666 } 6667 } 6668 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6669 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6670 ierr = PetscFree(nnz);CHKERRQ(ierr); 6671 /* Set interior change in the matrix */ 6672 if (!pcbddc->benign_change || pcbddc->fake_change) { 6673 for (i=0;i<pcis->n;i++) { 6674 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6675 } 6676 } else { 6677 const PetscInt *ii,*jj; 6678 PetscScalar *aa; 6679 PetscInt n; 6680 PetscBool flg_row; 6681 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6682 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6683 for (i=0;i<n;i++) { 6684 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6685 } 6686 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6687 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6688 } 6689 6690 if (pcbddc->dbg_flag) { 6691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6693 } 6694 6695 6696 /* Now we loop on the constraints which need a change of basis */ 6697 /* 6698 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6699 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6700 6701 Basic blocks of change of basis matrix T computed by 6702 6703 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6704 6705 | 1 0 ... 0 s_1/S | 6706 | 0 1 ... 0 s_2/S | 6707 | ... | 6708 | 0 ... 1 s_{n-1}/S | 6709 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6710 6711 with S = \sum_{i=1}^n s_i^2 6712 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6713 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6714 6715 - QR decomposition of constraints otherwise 6716 */ 6717 if (qr_needed && max_size_of_constraint) { 6718 /* space to store Q */ 6719 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6720 /* array to store scaling factors for reflectors */ 6721 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6722 /* first we issue queries for optimal work */ 6723 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6724 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6725 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6726 lqr_work = -1; 6727 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6728 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6729 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6730 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6731 lgqr_work = -1; 6732 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6733 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6734 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6735 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6736 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6737 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6738 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6739 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6740 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6741 /* array to store rhs and solution of triangular solver */ 6742 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6743 /* allocating workspace for check */ 6744 if (pcbddc->dbg_flag) { 6745 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6746 } 6747 } 6748 /* array to store whether a node is primal or not */ 6749 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6750 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6751 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6752 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6753 for (i=0;i<total_primal_vertices;i++) { 6754 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6755 } 6756 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6757 6758 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6759 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6760 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6761 if (PetscBTLookup(change_basis,total_counts)) { 6762 /* get constraint info */ 6763 primal_dofs = constraints_n[total_counts]; 6764 dual_dofs = size_of_constraint-primal_dofs; 6765 6766 if (pcbddc->dbg_flag) { 6767 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6768 } 6769 6770 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6771 6772 /* copy quadrature constraints for change of basis check */ 6773 if (pcbddc->dbg_flag) { 6774 ierr = PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6775 } 6776 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6777 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6778 6779 /* compute QR decomposition of constraints */ 6780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6781 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6783 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6784 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6785 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6786 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6787 6788 /* explictly compute R^-T */ 6789 ierr = PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);CHKERRQ(ierr); 6790 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6791 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6792 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6793 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6794 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6795 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6796 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6797 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6798 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6799 6800 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6801 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6802 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6803 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6804 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6805 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6806 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6807 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6808 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6809 6810 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6811 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6812 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6813 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6814 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6815 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6817 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6818 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6819 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6820 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6821 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6822 ierr = PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);CHKERRQ(ierr); 6823 6824 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6825 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6826 /* insert cols for primal dofs */ 6827 for (j=0;j<primal_dofs;j++) { 6828 start_vals = &qr_basis[j*size_of_constraint]; 6829 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6830 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6831 } 6832 /* insert cols for dual dofs */ 6833 for (j=0,k=0;j<dual_dofs;k++) { 6834 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6835 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6836 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6837 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6838 j++; 6839 } 6840 } 6841 6842 /* check change of basis */ 6843 if (pcbddc->dbg_flag) { 6844 PetscInt ii,jj; 6845 PetscBool valid_qr=PETSC_TRUE; 6846 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6847 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6848 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6849 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6850 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6851 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6852 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6853 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6854 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6855 for (jj=0;jj<size_of_constraint;jj++) { 6856 for (ii=0;ii<primal_dofs;ii++) { 6857 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6858 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6859 } 6860 } 6861 if (!valid_qr) { 6862 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6863 for (jj=0;jj<size_of_constraint;jj++) { 6864 for (ii=0;ii<primal_dofs;ii++) { 6865 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6866 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6867 } 6868 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6869 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6870 } 6871 } 6872 } 6873 } else { 6874 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6875 } 6876 } 6877 } else { /* simple transformation block */ 6878 PetscInt row,col; 6879 PetscScalar val,norm; 6880 6881 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6882 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6883 for (j=0;j<size_of_constraint;j++) { 6884 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6885 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6886 if (!PetscBTLookup(is_primal,row_B)) { 6887 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6888 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6889 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6890 } else { 6891 for (k=0;k<size_of_constraint;k++) { 6892 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6893 if (row != col) { 6894 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6895 } else { 6896 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6897 } 6898 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6899 } 6900 } 6901 } 6902 if (pcbddc->dbg_flag) { 6903 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6904 } 6905 } 6906 } else { 6907 if (pcbddc->dbg_flag) { 6908 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6909 } 6910 } 6911 } 6912 6913 /* free workspace */ 6914 if (qr_needed) { 6915 if (pcbddc->dbg_flag) { 6916 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6917 } 6918 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6919 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6920 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6921 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6922 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6923 } 6924 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6925 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6926 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6927 6928 /* assembling of global change of variable */ 6929 if (!pcbddc->fake_change) { 6930 Mat tmat; 6931 PetscInt bs; 6932 6933 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6934 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6935 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6936 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6937 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6938 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6939 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6940 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6941 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6942 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6943 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6944 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6945 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6946 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6947 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6948 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6949 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6950 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6951 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6952 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6953 6954 /* check */ 6955 if (pcbddc->dbg_flag) { 6956 PetscReal error; 6957 Vec x,x_change; 6958 6959 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6960 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6961 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6962 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6963 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6964 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6965 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6966 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6967 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6968 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6969 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6970 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6971 if (error > PETSC_SMALL) { 6972 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6973 } 6974 ierr = VecDestroy(&x);CHKERRQ(ierr); 6975 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6976 } 6977 /* adapt sub_schurs computed (if any) */ 6978 if (pcbddc->use_deluxe_scaling) { 6979 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6980 6981 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6982 if (sub_schurs && sub_schurs->S_Ej_all) { 6983 Mat S_new,tmat; 6984 IS is_all_N,is_V_Sall = NULL; 6985 6986 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6987 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6988 if (pcbddc->deluxe_zerorows) { 6989 ISLocalToGlobalMapping NtoSall; 6990 IS is_V; 6991 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6992 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6993 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6994 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6995 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6996 } 6997 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6998 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6999 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 7000 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7001 if (pcbddc->deluxe_zerorows) { 7002 const PetscScalar *array; 7003 const PetscInt *idxs_V,*idxs_all; 7004 PetscInt i,n_V; 7005 7006 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7007 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 7008 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7009 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7010 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 7011 for (i=0;i<n_V;i++) { 7012 PetscScalar val; 7013 PetscInt idx; 7014 7015 idx = idxs_V[i]; 7016 val = array[idxs_all[idxs_V[i]]]; 7017 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 7018 } 7019 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7020 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7021 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 7022 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 7023 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 7024 } 7025 sub_schurs->S_Ej_all = S_new; 7026 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7027 if (sub_schurs->sum_S_Ej_all) { 7028 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 7029 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 7030 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 7031 if (pcbddc->deluxe_zerorows) { 7032 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 7033 } 7034 sub_schurs->sum_S_Ej_all = S_new; 7035 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 7036 } 7037 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 7038 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 7039 } 7040 /* destroy any change of basis context in sub_schurs */ 7041 if (sub_schurs && sub_schurs->change) { 7042 PetscInt i; 7043 7044 for (i=0;i<sub_schurs->n_subs;i++) { 7045 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 7046 } 7047 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 7048 } 7049 } 7050 if (pcbddc->switch_static) { /* need to save the local change */ 7051 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7052 } else { 7053 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 7054 } 7055 /* determine if any process has changed the pressures locally */ 7056 pcbddc->change_interior = pcbddc->benign_have_null; 7057 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7058 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 7059 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7060 pcbddc->use_qr_single = qr_needed; 7061 } 7062 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7063 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7064 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 7065 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7066 } else { 7067 Mat benign_global = NULL; 7068 if (pcbddc->benign_have_null) { 7069 Mat M; 7070 7071 pcbddc->change_interior = PETSC_TRUE; 7072 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 7073 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 7074 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 7075 if (pcbddc->benign_change) { 7076 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 7077 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 7078 } else { 7079 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 7080 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 7081 } 7082 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 7083 ierr = MatDestroy(&M);CHKERRQ(ierr); 7084 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7085 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7086 } 7087 if (pcbddc->user_ChangeOfBasisMatrix) { 7088 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 7089 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 7090 } else if (pcbddc->benign_have_null) { 7091 pcbddc->ChangeOfBasisMatrix = benign_global; 7092 } 7093 } 7094 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7095 IS is_global; 7096 const PetscInt *gidxs; 7097 7098 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7099 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 7100 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 7101 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 7102 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 7103 } 7104 } 7105 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7106 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 7107 } 7108 7109 if (!pcbddc->fake_change) { 7110 /* add pressure dofs to set of primal nodes for numbering purposes */ 7111 for (i=0;i<pcbddc->benign_n;i++) { 7112 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7113 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7114 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7115 pcbddc->local_primal_size_cc++; 7116 pcbddc->local_primal_size++; 7117 } 7118 7119 /* check if a new primal space has been introduced (also take into account benign trick) */ 7120 pcbddc->new_primal_space_local = PETSC_TRUE; 7121 if (olocal_primal_size == pcbddc->local_primal_size) { 7122 ierr = PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7123 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7124 if (!pcbddc->new_primal_space_local) { 7125 ierr = PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 7126 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7127 } 7128 } 7129 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7130 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7131 } 7132 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 7133 7134 /* flush dbg viewer */ 7135 if (pcbddc->dbg_flag) { 7136 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7137 } 7138 7139 /* free workspace */ 7140 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 7141 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 7142 if (!pcbddc->adaptive_selection) { 7143 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 7144 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 7145 } else { 7146 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7147 pcbddc->adaptive_constraints_idxs_ptr, 7148 pcbddc->adaptive_constraints_data_ptr, 7149 pcbddc->adaptive_constraints_idxs, 7150 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 7151 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 7152 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 7153 } 7154 PetscFunctionReturn(0); 7155 } 7156 7157 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7158 { 7159 ISLocalToGlobalMapping map; 7160 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7161 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7162 PetscInt i,N; 7163 PetscBool rcsr = PETSC_FALSE; 7164 PetscErrorCode ierr; 7165 7166 PetscFunctionBegin; 7167 if (pcbddc->recompute_topography) { 7168 pcbddc->graphanalyzed = PETSC_FALSE; 7169 /* Reset previously computed graph */ 7170 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 7171 /* Init local Graph struct */ 7172 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 7173 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 7174 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 7175 7176 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7177 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7178 } 7179 /* Check validity of the csr graph passed in by the user */ 7180 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7181 7182 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7183 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7184 PetscInt *xadj,*adjncy; 7185 PetscInt nvtxs; 7186 PetscBool flg_row=PETSC_FALSE; 7187 7188 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7189 if (flg_row) { 7190 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 7191 pcbddc->computed_rowadj = PETSC_TRUE; 7192 } 7193 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 7194 rcsr = PETSC_TRUE; 7195 } 7196 if (pcbddc->dbg_flag) { 7197 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7198 } 7199 7200 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7201 PetscReal *lcoords; 7202 PetscInt n; 7203 MPI_Datatype dimrealtype; 7204 7205 /* TODO: support for blocked */ 7206 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7207 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7208 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 7209 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 7210 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 7211 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 7212 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 7213 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 7214 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 7215 7216 pcbddc->mat_graph->coords = lcoords; 7217 pcbddc->mat_graph->cloc = PETSC_TRUE; 7218 pcbddc->mat_graph->cnloc = n; 7219 } 7220 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7221 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 7222 7223 /* Setup of Graph */ 7224 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7225 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 7226 7227 /* attach info on disconnected subdomains if present */ 7228 if (pcbddc->n_local_subs) { 7229 PetscInt *local_subs,n,totn; 7230 7231 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 7232 ierr = PetscMalloc1(n,&local_subs);CHKERRQ(ierr); 7233 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7234 for (i=0;i<pcbddc->n_local_subs;i++) { 7235 const PetscInt *idxs; 7236 PetscInt nl,j; 7237 7238 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 7239 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7240 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7241 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7242 } 7243 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7244 pcbddc->mat_graph->n_local_subs = totn + 1; 7245 pcbddc->mat_graph->local_subs = local_subs; 7246 } 7247 } 7248 7249 if (!pcbddc->graphanalyzed) { 7250 /* Graph's connected components analysis */ 7251 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 7252 pcbddc->graphanalyzed = PETSC_TRUE; 7253 pcbddc->corner_selected = pcbddc->corner_selection; 7254 } 7255 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7256 PetscFunctionReturn(0); 7257 } 7258 7259 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7260 { 7261 PetscInt i,j,n; 7262 PetscScalar *alphas; 7263 PetscReal norm,*onorms; 7264 PetscErrorCode ierr; 7265 7266 PetscFunctionBegin; 7267 n = *nio; 7268 if (!n) PetscFunctionReturn(0); 7269 ierr = PetscMalloc2(n,&alphas,n,&onorms);CHKERRQ(ierr); 7270 ierr = VecNormalize(vecs[0],&norm);CHKERRQ(ierr); 7271 if (norm < PETSC_SMALL) { 7272 onorms[0] = 0.0; 7273 ierr = VecSet(vecs[0],0.0);CHKERRQ(ierr); 7274 } else { 7275 onorms[0] = norm; 7276 } 7277 7278 for (i=1;i<n;i++) { 7279 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7280 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7281 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7282 ierr = VecNormalize(vecs[i],&norm);CHKERRQ(ierr); 7283 if (norm < PETSC_SMALL) { 7284 onorms[i] = 0.0; 7285 ierr = VecSet(vecs[i],0.0);CHKERRQ(ierr); 7286 } else { 7287 onorms[i] = norm; 7288 } 7289 } 7290 /* push nonzero vectors at the beginning */ 7291 for (i=0;i<n;i++) { 7292 if (onorms[i] == 0.0) { 7293 for (j=i+1;j<n;j++) { 7294 if (onorms[j] != 0.0) { 7295 ierr = VecCopy(vecs[j],vecs[i]);CHKERRQ(ierr); 7296 onorms[j] = 0.0; 7297 } 7298 } 7299 } 7300 } 7301 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7302 ierr = PetscFree2(alphas,onorms);CHKERRQ(ierr); 7303 PetscFunctionReturn(0); 7304 } 7305 7306 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7307 { 7308 Mat A; 7309 PetscInt n_neighs,*neighs,*n_shared,**shared; 7310 PetscMPIInt size,rank,color; 7311 PetscInt *xadj,*adjncy; 7312 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7313 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7314 PetscInt void_procs,*procs_candidates = NULL; 7315 PetscInt xadj_count,*count; 7316 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7317 PetscSubcomm psubcomm; 7318 MPI_Comm subcomm; 7319 PetscErrorCode ierr; 7320 7321 PetscFunctionBegin; 7322 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7323 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7324 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7325 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7326 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7327 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7328 7329 if (have_void) *have_void = PETSC_FALSE; 7330 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7331 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7332 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7333 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7334 im_active = !!n; 7335 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7336 void_procs = size - active_procs; 7337 /* get ranks of of non-active processes in mat communicator */ 7338 if (void_procs) { 7339 PetscInt ncand; 7340 7341 if (have_void) *have_void = PETSC_TRUE; 7342 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7343 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7344 for (i=0,ncand=0;i<size;i++) { 7345 if (!procs_candidates[i]) { 7346 procs_candidates[ncand++] = i; 7347 } 7348 } 7349 /* force n_subdomains to be not greater that the number of non-active processes */ 7350 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7351 } 7352 7353 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7354 number of subdomains requested 1 -> send to master or first candidate in voids */ 7355 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7356 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7357 PetscInt issize,isidx,dest; 7358 if (*n_subdomains == 1) dest = 0; 7359 else dest = rank; 7360 if (im_active) { 7361 issize = 1; 7362 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7363 isidx = procs_candidates[dest]; 7364 } else { 7365 isidx = dest; 7366 } 7367 } else { 7368 issize = 0; 7369 isidx = -1; 7370 } 7371 if (*n_subdomains != 1) *n_subdomains = active_procs; 7372 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7373 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7374 PetscFunctionReturn(0); 7375 } 7376 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7377 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7378 threshold = PetscMax(threshold,2); 7379 7380 /* Get info on mapping */ 7381 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7382 7383 /* build local CSR graph of subdomains' connectivity */ 7384 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7385 xadj[0] = 0; 7386 xadj[1] = PetscMax(n_neighs-1,0); 7387 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7388 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7389 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7390 for (i=1;i<n_neighs;i++) 7391 for (j=0;j<n_shared[i];j++) 7392 count[shared[i][j]] += 1; 7393 7394 xadj_count = 0; 7395 for (i=1;i<n_neighs;i++) { 7396 for (j=0;j<n_shared[i];j++) { 7397 if (count[shared[i][j]] < threshold) { 7398 adjncy[xadj_count] = neighs[i]; 7399 adjncy_wgt[xadj_count] = n_shared[i]; 7400 xadj_count++; 7401 break; 7402 } 7403 } 7404 } 7405 xadj[1] = xadj_count; 7406 ierr = PetscFree(count);CHKERRQ(ierr); 7407 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7408 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7409 7410 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7411 7412 /* Restrict work on active processes only */ 7413 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7414 if (void_procs) { 7415 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7416 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7417 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7418 subcomm = PetscSubcommChild(psubcomm); 7419 } else { 7420 psubcomm = NULL; 7421 subcomm = PetscObjectComm((PetscObject)mat); 7422 } 7423 7424 v_wgt = NULL; 7425 if (!color) { 7426 ierr = PetscFree(xadj);CHKERRQ(ierr); 7427 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7428 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7429 } else { 7430 Mat subdomain_adj; 7431 IS new_ranks,new_ranks_contig; 7432 MatPartitioning partitioner; 7433 PetscInt rstart=0,rend=0; 7434 PetscInt *is_indices,*oldranks; 7435 PetscMPIInt size; 7436 PetscBool aggregate; 7437 7438 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7439 if (void_procs) { 7440 PetscInt prank = rank; 7441 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7442 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7443 for (i=0;i<xadj[1];i++) { 7444 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7445 } 7446 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7447 } else { 7448 oldranks = NULL; 7449 } 7450 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7451 if (aggregate) { /* TODO: all this part could be made more efficient */ 7452 PetscInt lrows,row,ncols,*cols; 7453 PetscMPIInt nrank; 7454 PetscScalar *vals; 7455 7456 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7457 lrows = 0; 7458 if (nrank<redprocs) { 7459 lrows = size/redprocs; 7460 if (nrank<size%redprocs) lrows++; 7461 } 7462 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7463 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7464 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7465 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7466 row = nrank; 7467 ncols = xadj[1]-xadj[0]; 7468 cols = adjncy; 7469 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7470 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7471 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7472 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7473 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7474 ierr = PetscFree(xadj);CHKERRQ(ierr); 7475 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7476 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7477 ierr = PetscFree(vals);CHKERRQ(ierr); 7478 if (use_vwgt) { 7479 Vec v; 7480 const PetscScalar *array; 7481 PetscInt nl; 7482 7483 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7484 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7485 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7486 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7487 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7488 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7489 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7490 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7491 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7492 ierr = VecDestroy(&v);CHKERRQ(ierr); 7493 } 7494 } else { 7495 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7496 if (use_vwgt) { 7497 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7498 v_wgt[0] = n; 7499 } 7500 } 7501 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7502 7503 /* Partition */ 7504 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7505 #if defined(PETSC_HAVE_PTSCOTCH) 7506 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);CHKERRQ(ierr); 7507 #elif defined(PETSC_HAVE_PARMETIS) 7508 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);CHKERRQ(ierr); 7509 #else 7510 ierr = MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);CHKERRQ(ierr); 7511 #endif 7512 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7513 if (v_wgt) { 7514 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7515 } 7516 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7517 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7518 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7519 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7520 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7521 7522 /* renumber new_ranks to avoid "holes" in new set of processors */ 7523 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7524 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7525 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7526 if (!aggregate) { 7527 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7528 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7529 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7530 } else if (oldranks) { 7531 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7532 } else { 7533 ranks_send_to_idx[0] = is_indices[0]; 7534 } 7535 } else { 7536 PetscInt idx = 0; 7537 PetscMPIInt tag; 7538 MPI_Request *reqs; 7539 7540 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7541 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7542 for (i=rstart;i<rend;i++) { 7543 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7544 } 7545 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7546 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7547 ierr = PetscFree(reqs);CHKERRQ(ierr); 7548 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7549 if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7550 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7551 } else if (oldranks) { 7552 ranks_send_to_idx[0] = oldranks[idx]; 7553 } else { 7554 ranks_send_to_idx[0] = idx; 7555 } 7556 } 7557 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7558 /* clean up */ 7559 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7560 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7561 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7562 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7563 } 7564 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7565 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7566 7567 /* assemble parallel IS for sends */ 7568 i = 1; 7569 if (!color) i=0; 7570 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7571 PetscFunctionReturn(0); 7572 } 7573 7574 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7575 7576 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7577 { 7578 Mat local_mat; 7579 IS is_sends_internal; 7580 PetscInt rows,cols,new_local_rows; 7581 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7582 PetscBool ismatis,isdense,newisdense,destroy_mat; 7583 ISLocalToGlobalMapping l2gmap; 7584 PetscInt* l2gmap_indices; 7585 const PetscInt* is_indices; 7586 MatType new_local_type; 7587 /* buffers */ 7588 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7589 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7590 PetscInt *recv_buffer_idxs_local; 7591 PetscScalar *ptr_vals,*recv_buffer_vals; 7592 const PetscScalar *send_buffer_vals; 7593 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7594 /* MPI */ 7595 MPI_Comm comm,comm_n; 7596 PetscSubcomm subcomm; 7597 PetscMPIInt n_sends,n_recvs,size; 7598 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7599 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7600 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7601 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7602 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7603 PetscErrorCode ierr; 7604 7605 PetscFunctionBegin; 7606 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7607 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7608 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7609 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7610 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7611 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7612 PetscValidLogicalCollectiveBool(mat,reuse,6); 7613 PetscValidLogicalCollectiveInt(mat,nis,8); 7614 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7615 if (nvecs) { 7616 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7617 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7618 } 7619 /* further checks */ 7620 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7621 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7622 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7623 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7624 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7625 if (reuse && *mat_n) { 7626 PetscInt mrows,mcols,mnrows,mncols; 7627 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7628 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7629 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7630 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7631 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7632 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7633 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7634 } 7635 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7636 PetscValidLogicalCollectiveInt(mat,bs,0); 7637 7638 /* prepare IS for sending if not provided */ 7639 if (!is_sends) { 7640 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7641 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7642 } else { 7643 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7644 is_sends_internal = is_sends; 7645 } 7646 7647 /* get comm */ 7648 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7649 7650 /* compute number of sends */ 7651 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7652 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7653 7654 /* compute number of receives */ 7655 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 7656 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7657 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 7658 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7659 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7660 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7661 ierr = PetscFree(iflags);CHKERRQ(ierr); 7662 7663 /* restrict comm if requested */ 7664 subcomm = NULL; 7665 destroy_mat = PETSC_FALSE; 7666 if (restrict_comm) { 7667 PetscMPIInt color,subcommsize; 7668 7669 color = 0; 7670 if (restrict_full) { 7671 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7672 } else { 7673 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7674 } 7675 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7676 subcommsize = size - subcommsize; 7677 /* check if reuse has been requested */ 7678 if (reuse) { 7679 if (*mat_n) { 7680 PetscMPIInt subcommsize2; 7681 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7682 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7683 comm_n = PetscObjectComm((PetscObject)*mat_n); 7684 } else { 7685 comm_n = PETSC_COMM_SELF; 7686 } 7687 } else { /* MAT_INITIAL_MATRIX */ 7688 PetscMPIInt rank; 7689 7690 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7691 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7692 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7693 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7694 comm_n = PetscSubcommChild(subcomm); 7695 } 7696 /* flag to destroy *mat_n if not significative */ 7697 if (color) destroy_mat = PETSC_TRUE; 7698 } else { 7699 comm_n = comm; 7700 } 7701 7702 /* prepare send/receive buffers */ 7703 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7704 ierr = PetscArrayzero(ilengths_idxs,size);CHKERRQ(ierr); 7705 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7706 ierr = PetscArrayzero(ilengths_vals,size);CHKERRQ(ierr); 7707 if (nis) { 7708 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7709 } 7710 7711 /* Get data from local matrices */ 7712 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7713 /* TODO: See below some guidelines on how to prepare the local buffers */ 7714 /* 7715 send_buffer_vals should contain the raw values of the local matrix 7716 send_buffer_idxs should contain: 7717 - MatType_PRIVATE type 7718 - PetscInt size_of_l2gmap 7719 - PetscInt global_row_indices[size_of_l2gmap] 7720 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7721 */ 7722 else { 7723 ierr = MatDenseGetArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7724 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7725 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7726 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7727 send_buffer_idxs[1] = i; 7728 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7729 ierr = PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);CHKERRQ(ierr); 7730 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7731 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7732 for (i=0;i<n_sends;i++) { 7733 ilengths_vals[is_indices[i]] = len*len; 7734 ilengths_idxs[is_indices[i]] = len+2; 7735 } 7736 } 7737 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7738 /* additional is (if any) */ 7739 if (nis) { 7740 PetscMPIInt psum; 7741 PetscInt j; 7742 for (j=0,psum=0;j<nis;j++) { 7743 PetscInt plen; 7744 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7745 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7746 psum += len+1; /* indices + lenght */ 7747 } 7748 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7749 for (j=0,psum=0;j<nis;j++) { 7750 PetscInt plen; 7751 const PetscInt *is_array_idxs; 7752 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7753 send_buffer_idxs_is[psum] = plen; 7754 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7755 ierr = PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);CHKERRQ(ierr); 7756 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7757 psum += plen+1; /* indices + lenght */ 7758 } 7759 for (i=0;i<n_sends;i++) { 7760 ilengths_idxs_is[is_indices[i]] = psum; 7761 } 7762 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7763 } 7764 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7765 7766 buf_size_idxs = 0; 7767 buf_size_vals = 0; 7768 buf_size_idxs_is = 0; 7769 buf_size_vecs = 0; 7770 for (i=0;i<n_recvs;i++) { 7771 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7772 buf_size_vals += (PetscInt)olengths_vals[i]; 7773 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7774 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7775 } 7776 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7777 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7778 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7779 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7780 7781 /* get new tags for clean communications */ 7782 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7783 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7784 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7785 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7786 7787 /* allocate for requests */ 7788 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7789 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7790 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7791 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7792 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7793 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7794 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7795 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7796 7797 /* communications */ 7798 ptr_idxs = recv_buffer_idxs; 7799 ptr_vals = recv_buffer_vals; 7800 ptr_idxs_is = recv_buffer_idxs_is; 7801 ptr_vecs = recv_buffer_vecs; 7802 for (i=0;i<n_recvs;i++) { 7803 source_dest = onodes[i]; 7804 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7805 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7806 ptr_idxs += olengths_idxs[i]; 7807 ptr_vals += olengths_vals[i]; 7808 if (nis) { 7809 source_dest = onodes_is[i]; 7810 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7811 ptr_idxs_is += olengths_idxs_is[i]; 7812 } 7813 if (nvecs) { 7814 source_dest = onodes[i]; 7815 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7816 ptr_vecs += olengths_idxs[i]-2; 7817 } 7818 } 7819 for (i=0;i<n_sends;i++) { 7820 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7821 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7822 ierr = MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7823 if (nis) { 7824 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7825 } 7826 if (nvecs) { 7827 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7828 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7829 } 7830 } 7831 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7832 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7833 7834 /* assemble new l2g map */ 7835 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7836 ptr_idxs = recv_buffer_idxs; 7837 new_local_rows = 0; 7838 for (i=0;i<n_recvs;i++) { 7839 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7840 ptr_idxs += olengths_idxs[i]; 7841 } 7842 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7843 ptr_idxs = recv_buffer_idxs; 7844 new_local_rows = 0; 7845 for (i=0;i<n_recvs;i++) { 7846 ierr = PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));CHKERRQ(ierr); 7847 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7848 ptr_idxs += olengths_idxs[i]; 7849 } 7850 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7851 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7852 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7853 7854 /* infer new local matrix type from received local matrices type */ 7855 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7856 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7857 if (n_recvs) { 7858 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7859 ptr_idxs = recv_buffer_idxs; 7860 for (i=0;i<n_recvs;i++) { 7861 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7862 new_local_type_private = MATAIJ_PRIVATE; 7863 break; 7864 } 7865 ptr_idxs += olengths_idxs[i]; 7866 } 7867 switch (new_local_type_private) { 7868 case MATDENSE_PRIVATE: 7869 new_local_type = MATSEQAIJ; 7870 bs = 1; 7871 break; 7872 case MATAIJ_PRIVATE: 7873 new_local_type = MATSEQAIJ; 7874 bs = 1; 7875 break; 7876 case MATBAIJ_PRIVATE: 7877 new_local_type = MATSEQBAIJ; 7878 break; 7879 case MATSBAIJ_PRIVATE: 7880 new_local_type = MATSEQSBAIJ; 7881 break; 7882 default: 7883 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7884 break; 7885 } 7886 } else { /* by default, new_local_type is seqaij */ 7887 new_local_type = MATSEQAIJ; 7888 bs = 1; 7889 } 7890 7891 /* create MATIS object if needed */ 7892 if (!reuse) { 7893 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7894 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7895 } else { 7896 /* it also destroys the local matrices */ 7897 if (*mat_n) { 7898 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7899 } else { /* this is a fake object */ 7900 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7901 } 7902 } 7903 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7904 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7905 7906 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7907 7908 /* Global to local map of received indices */ 7909 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7910 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7911 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7912 7913 /* restore attributes -> type of incoming data and its size */ 7914 buf_size_idxs = 0; 7915 for (i=0;i<n_recvs;i++) { 7916 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7917 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7918 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7919 } 7920 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7921 7922 /* set preallocation */ 7923 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7924 if (!newisdense) { 7925 PetscInt *new_local_nnz=NULL; 7926 7927 ptr_idxs = recv_buffer_idxs_local; 7928 if (n_recvs) { 7929 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7930 } 7931 for (i=0;i<n_recvs;i++) { 7932 PetscInt j; 7933 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7934 for (j=0;j<*(ptr_idxs+1);j++) { 7935 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7936 } 7937 } else { 7938 /* TODO */ 7939 } 7940 ptr_idxs += olengths_idxs[i]; 7941 } 7942 if (new_local_nnz) { 7943 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7944 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7945 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7946 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7947 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7948 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7949 } else { 7950 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7951 } 7952 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7953 } else { 7954 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7955 } 7956 7957 /* set values */ 7958 ptr_vals = recv_buffer_vals; 7959 ptr_idxs = recv_buffer_idxs_local; 7960 for (i=0;i<n_recvs;i++) { 7961 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7962 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7963 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7964 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7965 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7966 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7967 } else { 7968 /* TODO */ 7969 } 7970 ptr_idxs += olengths_idxs[i]; 7971 ptr_vals += olengths_vals[i]; 7972 } 7973 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7974 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7975 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7976 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7977 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7978 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7979 7980 #if 0 7981 if (!restrict_comm) { /* check */ 7982 Vec lvec,rvec; 7983 PetscReal infty_error; 7984 7985 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7986 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7987 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7988 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7989 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7990 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7991 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7992 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7993 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7994 } 7995 #endif 7996 7997 /* assemble new additional is (if any) */ 7998 if (nis) { 7999 PetscInt **temp_idxs,*count_is,j,psum; 8000 8001 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8002 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 8003 ptr_idxs = recv_buffer_idxs_is; 8004 psum = 0; 8005 for (i=0;i<n_recvs;i++) { 8006 for (j=0;j<nis;j++) { 8007 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8008 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8009 psum += plen; 8010 ptr_idxs += plen+1; /* shift pointer to received data */ 8011 } 8012 } 8013 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 8014 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 8015 for (i=1;i<nis;i++) { 8016 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 8017 } 8018 ierr = PetscArrayzero(count_is,nis);CHKERRQ(ierr); 8019 ptr_idxs = recv_buffer_idxs_is; 8020 for (i=0;i<n_recvs;i++) { 8021 for (j=0;j<nis;j++) { 8022 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8023 ierr = PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);CHKERRQ(ierr); 8024 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8025 ptr_idxs += plen+1; /* shift pointer to received data */ 8026 } 8027 } 8028 for (i=0;i<nis;i++) { 8029 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8030 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr); 8031 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8032 } 8033 ierr = PetscFree(count_is);CHKERRQ(ierr); 8034 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 8035 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 8036 } 8037 /* free workspace */ 8038 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 8039 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8040 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 8041 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8042 if (isdense) { 8043 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 8044 ierr = MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);CHKERRQ(ierr); 8045 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 8046 } else { 8047 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 8048 } 8049 if (nis) { 8050 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8051 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 8052 } 8053 8054 if (nvecs) { 8055 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8056 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8057 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8058 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8059 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 8060 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 8061 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 8062 /* set values */ 8063 ptr_vals = recv_buffer_vecs; 8064 ptr_idxs = recv_buffer_idxs_local; 8065 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8066 for (i=0;i<n_recvs;i++) { 8067 PetscInt j; 8068 for (j=0;j<*(ptr_idxs+1);j++) { 8069 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8070 } 8071 ptr_idxs += olengths_idxs[i]; 8072 ptr_vals += olengths_idxs[i]-2; 8073 } 8074 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 8075 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 8076 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 8077 } 8078 8079 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 8080 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 8081 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 8082 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 8083 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 8084 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 8085 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 8086 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 8087 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 8088 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 8089 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 8090 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 8091 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 8092 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 8093 ierr = PetscFree(onodes);CHKERRQ(ierr); 8094 if (nis) { 8095 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 8096 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 8097 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 8098 } 8099 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 8100 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8101 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 8102 for (i=0;i<nis;i++) { 8103 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8104 } 8105 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8106 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 8107 } 8108 *mat_n = NULL; 8109 } 8110 PetscFunctionReturn(0); 8111 } 8112 8113 /* temporary hack into ksp private data structure */ 8114 #include <petsc/private/kspimpl.h> 8115 8116 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8117 { 8118 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8119 PC_IS *pcis = (PC_IS*)pc->data; 8120 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8121 Mat coarsedivudotp = NULL; 8122 Mat coarseG,t_coarse_mat_is; 8123 MatNullSpace CoarseNullSpace = NULL; 8124 ISLocalToGlobalMapping coarse_islg; 8125 IS coarse_is,*isarray,corners; 8126 PetscInt i,im_active=-1,active_procs=-1; 8127 PetscInt nis,nisdofs,nisneu,nisvert; 8128 PetscInt coarse_eqs_per_proc; 8129 PC pc_temp; 8130 PCType coarse_pc_type; 8131 KSPType coarse_ksp_type; 8132 PetscBool multilevel_requested,multilevel_allowed; 8133 PetscBool coarse_reuse; 8134 PetscInt ncoarse,nedcfield; 8135 PetscBool compute_vecs = PETSC_FALSE; 8136 PetscScalar *array; 8137 MatReuse coarse_mat_reuse; 8138 PetscBool restr, full_restr, have_void; 8139 PetscMPIInt size; 8140 PetscErrorCode ierr; 8141 8142 PetscFunctionBegin; 8143 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8144 /* Assign global numbering to coarse dofs */ 8145 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8146 PetscInt ocoarse_size; 8147 compute_vecs = PETSC_TRUE; 8148 8149 pcbddc->new_primal_space = PETSC_TRUE; 8150 ocoarse_size = pcbddc->coarse_size; 8151 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 8152 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 8153 /* see if we can avoid some work */ 8154 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8155 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8156 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8157 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 8158 coarse_reuse = PETSC_FALSE; 8159 } else { /* we can safely reuse already computed coarse matrix */ 8160 coarse_reuse = PETSC_TRUE; 8161 } 8162 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8163 coarse_reuse = PETSC_FALSE; 8164 } 8165 /* reset any subassembling information */ 8166 if (!coarse_reuse || pcbddc->recompute_topography) { 8167 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8168 } 8169 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8170 coarse_reuse = PETSC_TRUE; 8171 } 8172 if (coarse_reuse && pcbddc->coarse_ksp) { 8173 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 8174 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 8175 coarse_mat_reuse = MAT_REUSE_MATRIX; 8176 } else { 8177 coarse_mat = NULL; 8178 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8179 } 8180 8181 /* creates temporary l2gmap and IS for coarse indexes */ 8182 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 8183 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 8184 8185 /* creates temporary MATIS object for coarse matrix */ 8186 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 8187 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 8188 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 8189 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8190 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8191 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 8192 8193 /* count "active" (i.e. with positive local size) and "void" processes */ 8194 im_active = !!(pcis->n); 8195 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8196 8197 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8198 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8199 /* full_restr : just use the receivers from the subassembling pattern */ 8200 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 8201 coarse_mat_is = NULL; 8202 multilevel_allowed = PETSC_FALSE; 8203 multilevel_requested = PETSC_FALSE; 8204 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8205 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8206 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8207 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8208 if (multilevel_requested) { 8209 ncoarse = active_procs/pcbddc->coarsening_ratio; 8210 restr = PETSC_FALSE; 8211 full_restr = PETSC_FALSE; 8212 } else { 8213 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8214 restr = PETSC_TRUE; 8215 full_restr = PETSC_TRUE; 8216 } 8217 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8218 ncoarse = PetscMax(1,ncoarse); 8219 if (!pcbddc->coarse_subassembling) { 8220 if (pcbddc->coarsening_ratio > 1) { 8221 if (multilevel_requested) { 8222 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8223 } else { 8224 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 8225 } 8226 } else { 8227 PetscMPIInt rank; 8228 8229 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 8230 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8231 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 8232 } 8233 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8234 PetscInt psum; 8235 if (pcbddc->coarse_ksp) psum = 1; 8236 else psum = 0; 8237 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8238 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8239 } 8240 /* determine if we can go multilevel */ 8241 if (multilevel_requested) { 8242 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8243 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8244 } 8245 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8246 8247 /* dump subassembling pattern */ 8248 if (pcbddc->dbg_flag && multilevel_allowed) { 8249 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 8250 } 8251 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8252 nedcfield = -1; 8253 corners = NULL; 8254 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8255 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8256 const PetscInt *idxs; 8257 ISLocalToGlobalMapping tmap; 8258 8259 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8260 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 8261 /* allocate space for temporary storage */ 8262 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 8263 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 8264 /* allocate for IS array */ 8265 nisdofs = pcbddc->n_ISForDofsLocal; 8266 if (pcbddc->nedclocal) { 8267 if (pcbddc->nedfield > -1) { 8268 nedcfield = pcbddc->nedfield; 8269 } else { 8270 nedcfield = 0; 8271 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8272 nisdofs = 1; 8273 } 8274 } 8275 nisneu = !!pcbddc->NeumannBoundariesLocal; 8276 nisvert = 0; /* nisvert is not used */ 8277 nis = nisdofs + nisneu + nisvert; 8278 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 8279 /* dofs splitting */ 8280 for (i=0;i<nisdofs;i++) { 8281 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 8282 if (nedcfield != i) { 8283 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 8284 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8285 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8286 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8287 } else { 8288 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8289 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8290 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8291 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8292 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8293 } 8294 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8295 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8296 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8297 } 8298 /* neumann boundaries */ 8299 if (pcbddc->NeumannBoundariesLocal) { 8300 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8301 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8302 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8303 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8304 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8305 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8306 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8307 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8308 } 8309 /* coordinates */ 8310 if (pcbddc->corner_selected) { 8311 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8312 ierr = ISGetLocalSize(corners,&tsize);CHKERRQ(ierr); 8313 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8314 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8315 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8316 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8317 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);CHKERRQ(ierr); 8318 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8319 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);CHKERRQ(ierr); 8320 } 8321 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8322 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8323 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8324 } else { 8325 nis = 0; 8326 nisdofs = 0; 8327 nisneu = 0; 8328 nisvert = 0; 8329 isarray = NULL; 8330 } 8331 /* destroy no longer needed map */ 8332 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8333 8334 /* subassemble */ 8335 if (multilevel_allowed) { 8336 Vec vp[1]; 8337 PetscInt nvecs = 0; 8338 PetscBool reuse,reuser; 8339 8340 if (coarse_mat) reuse = PETSC_TRUE; 8341 else reuse = PETSC_FALSE; 8342 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8343 vp[0] = NULL; 8344 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8345 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8346 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8347 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8348 nvecs = 1; 8349 8350 if (pcbddc->divudotp) { 8351 Mat B,loc_divudotp; 8352 Vec v,p; 8353 IS dummy; 8354 PetscInt np; 8355 8356 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8357 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8358 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8359 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8360 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8361 ierr = VecSet(p,1.);CHKERRQ(ierr); 8362 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8363 ierr = VecDestroy(&p);CHKERRQ(ierr); 8364 ierr = MatDestroy(&B);CHKERRQ(ierr); 8365 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8366 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8367 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8368 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8369 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8370 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8371 ierr = VecDestroy(&v);CHKERRQ(ierr); 8372 } 8373 } 8374 if (reuser) { 8375 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8376 } else { 8377 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8378 } 8379 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8380 PetscScalar *arraym; 8381 const PetscScalar *arrayv; 8382 PetscInt nl; 8383 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8384 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8385 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8386 ierr = VecGetArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8387 ierr = PetscArraycpy(arraym,arrayv,nl);CHKERRQ(ierr); 8388 ierr = VecRestoreArrayRead(vp[0],&arrayv);CHKERRQ(ierr); 8389 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8390 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8391 } else { 8392 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8393 } 8394 } else { 8395 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8396 } 8397 if (coarse_mat_is || coarse_mat) { 8398 if (!multilevel_allowed) { 8399 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8400 } else { 8401 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8402 if (coarse_mat_is) { 8403 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8404 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8405 coarse_mat = coarse_mat_is; 8406 } 8407 } 8408 } 8409 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8410 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8411 8412 /* create local to global scatters for coarse problem */ 8413 if (compute_vecs) { 8414 PetscInt lrows; 8415 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8416 if (coarse_mat) { 8417 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8418 } else { 8419 lrows = 0; 8420 } 8421 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8422 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8423 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8424 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8425 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8426 } 8427 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8428 8429 /* set defaults for coarse KSP and PC */ 8430 if (multilevel_allowed) { 8431 coarse_ksp_type = KSPRICHARDSON; 8432 coarse_pc_type = PCBDDC; 8433 } else { 8434 coarse_ksp_type = KSPPREONLY; 8435 coarse_pc_type = PCREDUNDANT; 8436 } 8437 8438 /* print some info if requested */ 8439 if (pcbddc->dbg_flag) { 8440 if (!multilevel_allowed) { 8441 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8442 if (multilevel_requested) { 8443 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8444 } else if (pcbddc->max_levels) { 8445 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8446 } 8447 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8448 } 8449 } 8450 8451 /* communicate coarse discrete gradient */ 8452 coarseG = NULL; 8453 if (pcbddc->nedcG && multilevel_allowed) { 8454 MPI_Comm ccomm; 8455 if (coarse_mat) { 8456 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8457 } else { 8458 ccomm = MPI_COMM_NULL; 8459 } 8460 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8461 } 8462 8463 /* create the coarse KSP object only once with defaults */ 8464 if (coarse_mat) { 8465 PetscBool isredundant,isbddc,force,valid; 8466 PetscViewer dbg_viewer = NULL; 8467 8468 if (pcbddc->dbg_flag) { 8469 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8470 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8471 } 8472 if (!pcbddc->coarse_ksp) { 8473 char prefix[256],str_level[16]; 8474 size_t len; 8475 8476 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8477 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8478 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8479 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8480 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8481 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8482 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8483 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8484 /* TODO is this logic correct? should check for coarse_mat type */ 8485 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8486 /* prefix */ 8487 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8488 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8489 if (!pcbddc->current_level) { 8490 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8491 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8492 } else { 8493 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8494 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8495 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8496 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8497 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8498 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8499 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8500 } 8501 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8502 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8503 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8504 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8505 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8506 /* allow user customization */ 8507 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8508 /* get some info after set from options */ 8509 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8510 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8511 force = PETSC_FALSE; 8512 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8513 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8514 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8515 if (multilevel_allowed && !force && !valid) { 8516 isbddc = PETSC_TRUE; 8517 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8518 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8519 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8520 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8521 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8522 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);CHKERRQ(ierr); 8523 ierr = (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);CHKERRQ(ierr); 8524 ierr = PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);CHKERRQ(ierr); 8525 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8526 pc_temp->setfromoptionscalled++; 8527 } 8528 } 8529 } 8530 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8531 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8532 if (nisdofs) { 8533 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8534 for (i=0;i<nisdofs;i++) { 8535 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8536 } 8537 } 8538 if (nisneu) { 8539 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8540 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8541 } 8542 if (nisvert) { 8543 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8544 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8545 } 8546 if (coarseG) { 8547 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8548 } 8549 8550 /* get some info after set from options */ 8551 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8552 8553 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8554 if (isbddc && !multilevel_allowed) { 8555 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8556 } 8557 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8558 force = PETSC_FALSE; 8559 ierr = PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);CHKERRQ(ierr); 8560 ierr = PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");CHKERRQ(ierr); 8561 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8562 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8563 } 8564 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8565 if (isredundant) { 8566 KSP inner_ksp; 8567 PC inner_pc; 8568 8569 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8570 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8571 } 8572 8573 /* parameters which miss an API */ 8574 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8575 if (isbddc) { 8576 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8577 8578 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8579 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8580 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8581 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8582 if (pcbddc_coarse->benign_saddle_point) { 8583 Mat coarsedivudotp_is; 8584 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8585 IS row,col; 8586 const PetscInt *gidxs; 8587 PetscInt n,st,M,N; 8588 8589 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8590 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8591 st = st-n; 8592 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8593 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8594 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8595 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8596 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8597 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8598 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8599 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8600 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8601 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8602 ierr = ISDestroy(&row);CHKERRQ(ierr); 8603 ierr = ISDestroy(&col);CHKERRQ(ierr); 8604 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8605 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8606 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8607 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8608 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8609 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8610 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8611 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8612 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8613 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8614 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8615 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8616 } 8617 } 8618 8619 /* propagate symmetry info of coarse matrix */ 8620 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8621 if (pc->pmat->symmetric_set) { 8622 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8623 } 8624 if (pc->pmat->hermitian_set) { 8625 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8626 } 8627 if (pc->pmat->spd_set) { 8628 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8629 } 8630 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8631 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8632 } 8633 /* set operators */ 8634 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8635 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8636 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8637 if (pcbddc->dbg_flag) { 8638 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8639 } 8640 } 8641 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8642 ierr = PetscFree(isarray);CHKERRQ(ierr); 8643 #if 0 8644 { 8645 PetscViewer viewer; 8646 char filename[256]; 8647 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8648 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8649 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8650 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8651 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8652 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8653 } 8654 #endif 8655 8656 if (corners) { 8657 Vec gv; 8658 IS is; 8659 const PetscInt *idxs; 8660 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8661 PetscScalar *coords; 8662 8663 if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8664 ierr = VecGetSize(pcbddc->coarse_vec,&N);CHKERRQ(ierr); 8665 ierr = VecGetLocalSize(pcbddc->coarse_vec,&n);CHKERRQ(ierr); 8666 ierr = VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);CHKERRQ(ierr); 8667 ierr = VecSetBlockSize(gv,cdim);CHKERRQ(ierr); 8668 ierr = VecSetSizes(gv,n*cdim,N*cdim);CHKERRQ(ierr); 8669 ierr = VecSetType(gv,VECSTANDARD);CHKERRQ(ierr); 8670 ierr = VecSetFromOptions(gv);CHKERRQ(ierr); 8671 ierr = VecSet(gv,PETSC_MAX_REAL);CHKERRQ(ierr); /* we only propagate coordinates from vertices constraints */ 8672 8673 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8674 ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr); 8675 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 8676 ierr = PetscMalloc1(n*cdim,&coords);CHKERRQ(ierr); 8677 for (i=0;i<n;i++) { 8678 for (d=0;d<cdim;d++) { 8679 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8680 } 8681 } 8682 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 8683 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);CHKERRQ(ierr); 8684 8685 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 8686 ierr = ISGetIndices(corners,&idxs);CHKERRQ(ierr); 8687 ierr = VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);CHKERRQ(ierr); 8688 ierr = ISRestoreIndices(corners,&idxs);CHKERRQ(ierr); 8689 ierr = PetscFree(coords);CHKERRQ(ierr); 8690 ierr = VecAssemblyBegin(gv);CHKERRQ(ierr); 8691 ierr = VecAssemblyEnd(gv);CHKERRQ(ierr); 8692 ierr = VecGetArray(gv,&coords);CHKERRQ(ierr); 8693 if (pcbddc->coarse_ksp) { 8694 PC coarse_pc; 8695 PetscBool isbddc; 8696 8697 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 8698 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 8699 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8700 PetscReal *realcoords; 8701 8702 ierr = VecGetLocalSize(gv,&n);CHKERRQ(ierr); 8703 #if defined(PETSC_USE_COMPLEX) 8704 ierr = PetscMalloc1(n,&realcoords);CHKERRQ(ierr); 8705 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8706 #else 8707 realcoords = coords; 8708 #endif 8709 ierr = PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);CHKERRQ(ierr); 8710 #if defined(PETSC_USE_COMPLEX) 8711 ierr = PetscFree(realcoords);CHKERRQ(ierr); 8712 #endif 8713 } 8714 } 8715 ierr = VecRestoreArray(gv,&coords);CHKERRQ(ierr); 8716 ierr = VecDestroy(&gv);CHKERRQ(ierr); 8717 } 8718 ierr = ISDestroy(&corners);CHKERRQ(ierr); 8719 8720 if (pcbddc->coarse_ksp) { 8721 Vec crhs,csol; 8722 8723 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8724 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8725 if (!csol) { 8726 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8727 } 8728 if (!crhs) { 8729 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8730 } 8731 } 8732 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8733 8734 /* compute null space for coarse solver if the benign trick has been requested */ 8735 if (pcbddc->benign_null) { 8736 8737 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8738 for (i=0;i<pcbddc->benign_n;i++) { 8739 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8740 } 8741 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8742 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8743 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8744 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8745 if (coarse_mat) { 8746 Vec nullv; 8747 PetscScalar *array,*array2; 8748 PetscInt nl; 8749 8750 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8751 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8752 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8753 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8754 ierr = PetscArraycpy(array2,array,nl);CHKERRQ(ierr); 8755 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8756 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8757 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8758 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8759 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8760 } 8761 } 8762 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8763 8764 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8765 if (pcbddc->coarse_ksp) { 8766 PetscBool ispreonly; 8767 8768 if (CoarseNullSpace) { 8769 PetscBool isnull; 8770 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8771 if (isnull) { 8772 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8773 } 8774 /* TODO: add local nullspaces (if any) */ 8775 } 8776 /* setup coarse ksp */ 8777 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8778 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8779 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8780 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8781 KSP check_ksp; 8782 KSPType check_ksp_type; 8783 PC check_pc; 8784 Vec check_vec,coarse_vec; 8785 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8786 PetscInt its; 8787 PetscBool compute_eigs; 8788 PetscReal *eigs_r,*eigs_c; 8789 PetscInt neigs; 8790 const char *prefix; 8791 8792 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8793 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8794 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8795 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8796 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8797 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8798 /* prevent from setup unneeded object */ 8799 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8800 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8801 if (ispreonly) { 8802 check_ksp_type = KSPPREONLY; 8803 compute_eigs = PETSC_FALSE; 8804 } else { 8805 check_ksp_type = KSPGMRES; 8806 compute_eigs = PETSC_TRUE; 8807 } 8808 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8809 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8810 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8811 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8812 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8813 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8814 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8815 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8816 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8817 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8818 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8819 /* create random vec */ 8820 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8821 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8822 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8823 /* solve coarse problem */ 8824 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8825 ierr = KSPCheckSolve(check_ksp,pc,coarse_vec);CHKERRQ(ierr); 8826 /* set eigenvalue estimation if preonly has not been requested */ 8827 if (compute_eigs) { 8828 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8829 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8830 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8831 if (neigs) { 8832 lambda_max = eigs_r[neigs-1]; 8833 lambda_min = eigs_r[0]; 8834 if (pcbddc->use_coarse_estimates) { 8835 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8836 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8837 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8838 } 8839 } 8840 } 8841 } 8842 8843 /* check coarse problem residual error */ 8844 if (pcbddc->dbg_flag) { 8845 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8846 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8847 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8848 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8849 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8850 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8851 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8852 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8853 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8854 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8855 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8856 if (CoarseNullSpace) { 8857 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8858 } 8859 if (compute_eigs) { 8860 PetscReal lambda_max_s,lambda_min_s; 8861 KSPConvergedReason reason; 8862 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8863 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8864 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8865 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8866 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8867 for (i=0;i<neigs;i++) { 8868 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8869 } 8870 } 8871 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8872 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8873 } 8874 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8875 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8876 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8877 if (compute_eigs) { 8878 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8879 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8880 } 8881 } 8882 } 8883 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8884 /* print additional info */ 8885 if (pcbddc->dbg_flag) { 8886 /* waits until all processes reaches this point */ 8887 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8888 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8889 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8890 } 8891 8892 /* free memory */ 8893 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8894 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8895 PetscFunctionReturn(0); 8896 } 8897 8898 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8899 { 8900 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8901 PC_IS* pcis = (PC_IS*)pc->data; 8902 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8903 IS subset,subset_mult,subset_n; 8904 PetscInt local_size,coarse_size=0; 8905 PetscInt *local_primal_indices=NULL; 8906 const PetscInt *t_local_primal_indices; 8907 PetscErrorCode ierr; 8908 8909 PetscFunctionBegin; 8910 /* Compute global number of coarse dofs */ 8911 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8912 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8913 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8914 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8915 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8916 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8917 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8918 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8919 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8920 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8921 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8922 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8923 ierr = PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);CHKERRQ(ierr); 8924 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8925 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8926 8927 /* check numbering */ 8928 if (pcbddc->dbg_flag) { 8929 PetscScalar coarsesum,*array,*array2; 8930 PetscInt i; 8931 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8932 8933 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8934 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8935 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8936 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8937 /* counter */ 8938 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8939 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8940 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8941 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8942 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8943 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8944 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8945 for (i=0;i<pcbddc->local_primal_size;i++) { 8946 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8947 } 8948 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8949 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8950 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8951 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8952 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8953 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8954 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8955 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8956 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8957 for (i=0;i<pcis->n;i++) { 8958 if (array[i] != 0.0 && array[i] != array2[i]) { 8959 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8960 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8961 set_error = PETSC_TRUE; 8962 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8963 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8964 } 8965 } 8966 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8967 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8968 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8969 for (i=0;i<pcis->n;i++) { 8970 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8971 } 8972 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8973 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8974 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8975 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8976 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8977 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8978 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8979 PetscInt *gidxs; 8980 8981 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8982 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8983 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8985 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8986 for (i=0;i<pcbddc->local_primal_size;i++) { 8987 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8988 } 8989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8990 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8991 } 8992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8993 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8994 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8995 } 8996 8997 /* get back data */ 8998 *coarse_size_n = coarse_size; 8999 *local_primal_indices_n = local_primal_indices; 9000 PetscFunctionReturn(0); 9001 } 9002 9003 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 9004 { 9005 IS localis_t; 9006 PetscInt i,lsize,*idxs,n; 9007 PetscScalar *vals; 9008 PetscErrorCode ierr; 9009 9010 PetscFunctionBegin; 9011 /* get indices in local ordering exploiting local to global map */ 9012 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 9013 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 9014 for (i=0;i<lsize;i++) vals[i] = 1.0; 9015 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9016 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 9017 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 9018 if (idxs) { /* multilevel guard */ 9019 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 9020 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 9021 } 9022 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 9023 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 9024 ierr = PetscFree(vals);CHKERRQ(ierr); 9025 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 9026 /* now compute set in local ordering */ 9027 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9028 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9029 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9030 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 9031 for (i=0,lsize=0;i<n;i++) { 9032 if (PetscRealPart(vals[i]) > 0.5) { 9033 lsize++; 9034 } 9035 } 9036 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 9037 for (i=0,lsize=0;i<n;i++) { 9038 if (PetscRealPart(vals[i]) > 0.5) { 9039 idxs[lsize++] = i; 9040 } 9041 } 9042 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 9043 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 9044 *localis = localis_t; 9045 PetscFunctionReturn(0); 9046 } 9047 9048 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9049 { 9050 PC_IS *pcis=(PC_IS*)pc->data; 9051 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9052 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9053 Mat S_j; 9054 PetscInt *used_xadj,*used_adjncy; 9055 PetscBool free_used_adj; 9056 PetscErrorCode ierr; 9057 9058 PetscFunctionBegin; 9059 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9060 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9061 free_used_adj = PETSC_FALSE; 9062 if (pcbddc->sub_schurs_layers == -1) { 9063 used_xadj = NULL; 9064 used_adjncy = NULL; 9065 } else { 9066 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9067 used_xadj = pcbddc->mat_graph->xadj; 9068 used_adjncy = pcbddc->mat_graph->adjncy; 9069 } else if (pcbddc->computed_rowadj) { 9070 used_xadj = pcbddc->mat_graph->xadj; 9071 used_adjncy = pcbddc->mat_graph->adjncy; 9072 } else { 9073 PetscBool flg_row=PETSC_FALSE; 9074 const PetscInt *xadj,*adjncy; 9075 PetscInt nvtxs; 9076 9077 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9078 if (flg_row) { 9079 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 9080 ierr = PetscArraycpy(used_xadj,xadj,nvtxs+1);CHKERRQ(ierr); 9081 ierr = PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);CHKERRQ(ierr); 9082 free_used_adj = PETSC_TRUE; 9083 } else { 9084 pcbddc->sub_schurs_layers = -1; 9085 used_xadj = NULL; 9086 used_adjncy = NULL; 9087 } 9088 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 9089 } 9090 } 9091 9092 /* setup sub_schurs data */ 9093 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9094 if (!sub_schurs->schur_explicit) { 9095 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9096 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9097 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 9098 } else { 9099 Mat change = NULL; 9100 Vec scaling = NULL; 9101 IS change_primal = NULL, iP; 9102 PetscInt benign_n; 9103 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9104 PetscBool need_change = PETSC_FALSE; 9105 PetscBool discrete_harmonic = PETSC_FALSE; 9106 9107 if (!pcbddc->use_vertices && reuse_solvers) { 9108 PetscInt n_vertices; 9109 9110 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 9111 reuse_solvers = (PetscBool)!n_vertices; 9112 } 9113 if (!pcbddc->benign_change_explicit) { 9114 benign_n = pcbddc->benign_n; 9115 } else { 9116 benign_n = 0; 9117 } 9118 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9119 We need a global reduction to avoid possible deadlocks. 9120 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9121 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9122 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9123 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 9124 need_change = (PetscBool)(!need_change); 9125 } 9126 /* If the user defines additional constraints, we import them here. 9127 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9128 if (need_change) { 9129 PC_IS *pcisf; 9130 PC_BDDC *pcbddcf; 9131 PC pcf; 9132 9133 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9134 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 9135 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 9136 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 9137 9138 /* hacks */ 9139 pcisf = (PC_IS*)pcf->data; 9140 pcisf->is_B_local = pcis->is_B_local; 9141 pcisf->vec1_N = pcis->vec1_N; 9142 pcisf->BtoNmap = pcis->BtoNmap; 9143 pcisf->n = pcis->n; 9144 pcisf->n_B = pcis->n_B; 9145 pcbddcf = (PC_BDDC*)pcf->data; 9146 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 9147 pcbddcf->mat_graph = pcbddc->mat_graph; 9148 pcbddcf->use_faces = PETSC_TRUE; 9149 pcbddcf->use_change_of_basis = PETSC_TRUE; 9150 pcbddcf->use_change_on_faces = PETSC_TRUE; 9151 pcbddcf->use_qr_single = PETSC_TRUE; 9152 pcbddcf->fake_change = PETSC_TRUE; 9153 9154 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9155 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 9156 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9157 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 9158 change = pcbddcf->ConstraintMatrix; 9159 pcbddcf->ConstraintMatrix = NULL; 9160 9161 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9162 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 9163 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 9164 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 9165 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 9166 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 9167 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 9168 pcf->ops->destroy = NULL; 9169 pcf->ops->reset = NULL; 9170 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 9171 } 9172 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9173 9174 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 9175 if (iP) { 9176 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 9177 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 9178 ierr = PetscOptionsEnd();CHKERRQ(ierr); 9179 } 9180 if (discrete_harmonic) { 9181 Mat A; 9182 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 9183 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 9184 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 9185 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9186 ierr = MatDestroy(&A);CHKERRQ(ierr); 9187 } else { 9188 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 9189 } 9190 ierr = MatDestroy(&change);CHKERRQ(ierr); 9191 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 9192 } 9193 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9194 9195 /* free adjacency */ 9196 if (free_used_adj) { 9197 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 9198 } 9199 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 9200 PetscFunctionReturn(0); 9201 } 9202 9203 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9204 { 9205 PC_IS *pcis=(PC_IS*)pc->data; 9206 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9207 PCBDDCGraph graph; 9208 PetscErrorCode ierr; 9209 9210 PetscFunctionBegin; 9211 /* attach interface graph for determining subsets */ 9212 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9213 IS verticesIS,verticescomm; 9214 PetscInt vsize,*idxs; 9215 9216 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9217 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 9218 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 9220 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 9221 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 9222 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 9223 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 9224 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 9225 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 9226 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 9227 } else { 9228 graph = pcbddc->mat_graph; 9229 } 9230 /* print some info */ 9231 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9232 IS vertices; 9233 PetscInt nv,nedges,nfaces; 9234 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 9235 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9236 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 9237 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 9239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 9240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 9241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 9242 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 9243 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 9244 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 9245 } 9246 9247 /* sub_schurs init */ 9248 if (!pcbddc->sub_schurs) { 9249 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 9250 } 9251 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 9252 9253 /* free graph struct */ 9254 if (pcbddc->sub_schurs_rebuild) { 9255 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 9256 } 9257 PetscFunctionReturn(0); 9258 } 9259 9260 PetscErrorCode PCBDDCCheckOperator(PC pc) 9261 { 9262 PC_IS *pcis=(PC_IS*)pc->data; 9263 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9264 PetscErrorCode ierr; 9265 9266 PetscFunctionBegin; 9267 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9268 IS zerodiag = NULL; 9269 Mat S_j,B0_B=NULL; 9270 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9271 PetscScalar *p0_check,*array,*array2; 9272 PetscReal norm; 9273 PetscInt i; 9274 9275 /* B0 and B0_B */ 9276 if (zerodiag) { 9277 IS dummy; 9278 9279 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 9280 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 9281 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 9282 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 9283 } 9284 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9285 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 9286 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 9287 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9288 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9289 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9290 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9291 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 9292 /* S_j */ 9293 ierr = MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 9294 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 9295 9296 /* mimic vector in \widetilde{W}_\Gamma */ 9297 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 9298 /* continuous in primal space */ 9299 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 9300 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9301 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9302 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9303 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 9304 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9305 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9306 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9307 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9308 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9309 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9310 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9311 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 9312 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 9313 9314 /* assemble rhs for coarse problem */ 9315 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9316 /* local with Schur */ 9317 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 9318 if (zerodiag) { 9319 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9320 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9321 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9322 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 9323 } 9324 /* sum on primal nodes the local contributions */ 9325 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9326 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9327 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9328 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9329 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9330 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9331 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9332 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 9333 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9334 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9335 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9336 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9337 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9338 /* scale primal nodes (BDDC sums contibutions) */ 9339 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 9340 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9341 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9342 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9343 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9344 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9345 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9346 /* global: \widetilde{B0}_B w_\Gamma */ 9347 if (zerodiag) { 9348 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 9349 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9350 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9351 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9352 } 9353 /* BDDC */ 9354 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 9355 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 9356 9357 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 9358 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 9359 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 9360 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 9361 for (i=0;i<pcbddc->benign_n;i++) { 9362 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 9363 } 9364 ierr = PetscFree(p0_check);CHKERRQ(ierr); 9365 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 9366 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 9367 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 9368 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9369 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 9370 } 9371 PetscFunctionReturn(0); 9372 } 9373 9374 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9375 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9376 { 9377 Mat At; 9378 IS rows; 9379 PetscInt rst,ren; 9380 PetscErrorCode ierr; 9381 PetscLayout rmap; 9382 9383 PetscFunctionBegin; 9384 rst = ren = 0; 9385 if (ccomm != MPI_COMM_NULL) { 9386 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9387 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9388 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9389 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9390 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9391 } 9392 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9393 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9394 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9395 9396 if (ccomm != MPI_COMM_NULL) { 9397 Mat_MPIAIJ *a,*b; 9398 IS from,to; 9399 Vec gvec; 9400 PetscInt lsize; 9401 9402 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9403 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9404 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9405 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9406 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9407 a = (Mat_MPIAIJ*)At->data; 9408 b = (Mat_MPIAIJ*)(*B)->data; 9409 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9410 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9411 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9412 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9413 b->A = a->A; 9414 b->B = a->B; 9415 9416 b->donotstash = a->donotstash; 9417 b->roworiented = a->roworiented; 9418 b->rowindices = NULL; 9419 b->rowvalues = NULL; 9420 b->getrowactive = PETSC_FALSE; 9421 9422 (*B)->rmap = rmap; 9423 (*B)->factortype = A->factortype; 9424 (*B)->assembled = PETSC_TRUE; 9425 (*B)->insertmode = NOT_SET_VALUES; 9426 (*B)->preallocated = PETSC_TRUE; 9427 9428 if (a->colmap) { 9429 #if defined(PETSC_USE_CTABLE) 9430 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9431 #else 9432 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9433 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9434 ierr = PetscArraycpy(b->colmap,a->colmap,At->cmap->N);CHKERRQ(ierr); 9435 #endif 9436 } else b->colmap = NULL; 9437 if (a->garray) { 9438 PetscInt len; 9439 len = a->B->cmap->n; 9440 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9441 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9442 if (len) { ierr = PetscArraycpy(b->garray,a->garray,len);CHKERRQ(ierr); } 9443 } else b->garray = NULL; 9444 9445 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9446 b->lvec = a->lvec; 9447 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9448 9449 /* cannot use VecScatterCopy */ 9450 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9451 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9452 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9453 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9454 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9455 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9456 ierr = ISDestroy(&from);CHKERRQ(ierr); 9457 ierr = ISDestroy(&to);CHKERRQ(ierr); 9458 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9459 } 9460 ierr = MatDestroy(&At);CHKERRQ(ierr); 9461 PetscFunctionReturn(0); 9462 } 9463