1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A,&nr,&nc)); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 PetscCall(PetscMalloc1(ulw,&uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n,&sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr*nr,&U)); 46 PetscCall(PetscBLASIntCast(nr,&bM)); 47 PetscCall(PetscBLASIntCast(nc,&bN)); 48 PetscCall(PetscBLASIntCast(ulw,&lwork)); 49 PetscCall(MatDenseGetArray(A,&data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 53 #else 54 PetscCall(PetscMalloc1(5*n,&rwork2)); 55 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 60 PetscCall(MatDenseRestoreArray(A,&data)); 61 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 62 if (!rwork) { 63 PetscCall(PetscFree(sing)); 64 } 65 if (!work) { 66 PetscCall(PetscFree(uwork)); 67 } 68 /* create B */ 69 if (!range) { 70 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B)); 71 PetscCall(MatDenseGetArray(*B,&data)); 72 PetscCall(PetscArraycpy(data,U+nr*i,(nr-i)*nr)); 73 } else { 74 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B)); 75 PetscCall(MatDenseGetArray(*B,&data)); 76 PetscCall(PetscArraycpy(data,U,i*nr)); 77 } 78 PetscCall(MatDenseRestoreArray(*B,&data)); 79 PetscCall(PetscFree(U)); 80 PetscFunctionReturn(0); 81 } 82 83 /* TODO REMOVE */ 84 #if defined(PRINT_GDET) 85 static int inc = 0; 86 static int lev = 0; 87 #endif 88 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 Mat GE,GEd; 92 PetscInt rsize,csize,esize; 93 PetscScalar *ptr; 94 95 PetscFunctionBegin; 96 PetscCall(ISGetSize(edge,&esize)); 97 if (!esize) PetscFunctionReturn(0); 98 PetscCall(ISGetSize(extrow,&rsize)); 99 PetscCall(ISGetSize(extcol,&csize)); 100 101 /* gradients */ 102 ptr = work + 5*esize; 103 PetscCall(MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE)); 104 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins)); 105 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins)); 106 PetscCall(MatDestroy(&GE)); 107 108 /* constants */ 109 ptr += rsize*csize; 110 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd)); 111 PetscCall(MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE)); 112 PetscCall(MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd)); 113 PetscCall(MatDestroy(&GE)); 114 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins)); 115 PetscCall(MatDestroy(&GEd)); 116 117 if (corners) { 118 Mat GEc; 119 const PetscScalar *vals; 120 PetscScalar v; 121 122 PetscCall(MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc)); 123 PetscCall(MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd)); 124 PetscCall(MatDenseGetArrayRead(GEd,&vals)); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 PetscCall(MatDenseRestoreArrayRead(GEd,&vals)); 130 PetscCall(MatScale(*GKins,1./v)); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 137 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 138 PetscCall(PetscObjectSetName((PetscObject)GEc,"GEc")); 139 PetscCall(MatView(GEc,viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)(*GKins),"GK")); 141 PetscCall(MatView(*GKins,viewer)); 142 PetscCall(PetscObjectSetName((PetscObject)GEd,"Gproj")); 143 PetscCall(MatView(GEd,viewer)); 144 PetscCall(PetscViewerDestroy(&viewer)); 145 } 146 #endif 147 PetscCall(MatDestroy(&GEd)); 148 PetscCall(MatDestroy(&GEc)); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode PCBDDCNedelecSupport(PC pc) 155 { 156 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 158 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 159 Vec tvec; 160 PetscSF sfv; 161 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 162 MPI_Comm comm; 163 IS lned,primals,allprimals,nedfieldlocal; 164 IS *eedges,*extrows,*extcols,*alleedges; 165 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 166 PetscScalar *vals,*work; 167 PetscReal *rwork; 168 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 169 PetscInt ne,nv,Lv,order,n,field; 170 PetscInt n_neigh,*neigh,*n_shared,**shared; 171 PetscInt i,j,extmem,cum,maxsize,nee; 172 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 173 PetscInt *sfvleaves,*sfvroots; 174 PetscInt *corners,*cedges; 175 PetscInt *ecount,**eneighs,*vcount,**vneighs; 176 PetscInt *emarks; 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 /* If the discrete gradient is defined for a subset of dofs and global is true, 182 it assumes G is given in global ordering for all the dofs. 183 Otherwise, the ordering is global for the Nedelec field */ 184 order = pcbddc->nedorder; 185 conforming = pcbddc->conforming; 186 field = pcbddc->nedfield; 187 global = pcbddc->nedglobal; 188 setprimal = PETSC_FALSE; 189 print = PETSC_FALSE; 190 singular = PETSC_FALSE; 191 192 /* Command line customization */ 193 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");PetscCall(ierr); 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL)); 195 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL)); 196 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL)); 197 /* print debug info TODO: to be removed */ 198 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL)); 199 ierr = PetscOptionsEnd();PetscCall(ierr); 200 201 /* Return if there are no edges in the decomposition and the problem is not singular */ 202 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL)); 203 PetscCall(ISLocalToGlobalMappingGetSize(al2g,&n)); 204 PetscCall(PetscObjectGetComm((PetscObject)pc,&comm)); 205 if (!singular) { 206 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 207 lrc[0] = PETSC_FALSE; 208 for (i=0;i<n;i++) { 209 if (PetscRealPart(vals[i]) > 2.) { 210 lrc[0] = PETSC_TRUE; 211 break; 212 } 213 } 214 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 215 PetscCallMPI(MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm)); 216 if (!lrc[1]) PetscFunctionReturn(0); 217 } 218 219 /* Get Nedelec field */ 220 PetscCheckFalse(pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal,comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 221 if (pcbddc->n_ISForDofsLocal && field >= 0) { 222 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 223 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 224 PetscCall(ISGetLocalSize(nedfieldlocal,&ne)); 225 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 226 ne = n; 227 nedfieldlocal = NULL; 228 global = PETSC_TRUE; 229 } else if (field == PETSC_DECIDE) { 230 PetscInt rst,ren,*idx; 231 232 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 233 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 234 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren)); 235 for (i=rst;i<ren;i++) { 236 PetscInt nc; 237 238 PetscCall(MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 239 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 240 PetscCall(MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL)); 241 } 242 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 243 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 244 PetscCall(PetscMalloc1(n,&idx)); 245 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 246 PetscCall(ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal)); 247 } else { 248 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 249 } 250 251 /* Sanity checks */ 252 PetscCheckFalse(!order && !conforming,comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 253 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix,comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 254 PetscCheckFalse(order && ne%order,PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 255 256 /* Just set primal dofs and return */ 257 if (setprimal) { 258 IS enedfieldlocal; 259 PetscInt *eidxs; 260 261 PetscCall(PetscMalloc1(ne,&eidxs)); 262 PetscCall(VecGetArrayRead(matis->counter,(const PetscScalar**)&vals)); 263 if (nedfieldlocal) { 264 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 265 for (i=0,cum=0;i<ne;i++) { 266 if (PetscRealPart(vals[idxs[i]]) > 2.) { 267 eidxs[cum++] = idxs[i]; 268 } 269 } 270 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 271 } else { 272 for (i=0,cum=0;i<ne;i++) { 273 if (PetscRealPart(vals[i]) > 2.) { 274 eidxs[cum++] = i; 275 } 276 } 277 } 278 PetscCall(VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals)); 279 PetscCall(ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal)); 280 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal)); 281 PetscCall(PetscFree(eidxs)); 282 PetscCall(ISDestroy(&nedfieldlocal)); 283 PetscCall(ISDestroy(&enedfieldlocal)); 284 PetscFunctionReturn(0); 285 } 286 287 /* Compute some l2g maps */ 288 if (nedfieldlocal) { 289 IS is; 290 291 /* need to map from the local Nedelec field to local numbering */ 292 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g)); 293 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 294 PetscCall(ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is)); 295 PetscCall(ISLocalToGlobalMappingCreateIS(is,&al2g)); 296 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 297 if (global) { 298 PetscCall(PetscObjectReference((PetscObject)al2g)); 299 el2g = al2g; 300 } else { 301 IS gis; 302 303 PetscCall(ISRenumber(is,NULL,NULL,&gis)); 304 PetscCall(ISLocalToGlobalMappingCreateIS(gis,&el2g)); 305 PetscCall(ISDestroy(&gis)); 306 } 307 PetscCall(ISDestroy(&is)); 308 } else { 309 /* restore default */ 310 pcbddc->nedfield = -1; 311 /* one ref for the destruction of al2g, one for el2g */ 312 PetscCall(PetscObjectReference((PetscObject)al2g)); 313 PetscCall(PetscObjectReference((PetscObject)al2g)); 314 el2g = al2g; 315 fl2g = NULL; 316 } 317 318 /* Start communication to drop connections for interior edges (for cc analysis only) */ 319 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 320 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 321 if (nedfieldlocal) { 322 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 323 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 324 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 325 } else { 326 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 327 } 328 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 329 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM)); 330 331 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 332 PetscCall(MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G)); 333 PetscCall(MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 334 if (global) { 335 PetscInt rst; 336 337 PetscCall(MatGetOwnershipRange(G,&rst,NULL)); 338 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 339 if (matis->sf_rootdata[i] < 2) { 340 matis->sf_rootdata[cum++] = i + rst; 341 } 342 } 343 PetscCall(MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE)); 344 PetscCall(MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL)); 345 } else { 346 PetscInt *tbz; 347 348 PetscCall(PetscMalloc1(ne,&tbz)); 349 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 350 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 351 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 352 for (i=0,cum=0;i<ne;i++) 353 if (matis->sf_leafdata[idxs[i]] == 1) 354 tbz[cum++] = i; 355 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 356 PetscCall(ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz)); 357 PetscCall(MatZeroRows(G,cum,tbz,0.,NULL,NULL)); 358 PetscCall(PetscFree(tbz)); 359 } 360 } else { /* we need the entire G to infer the nullspace */ 361 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 362 G = pcbddc->discretegradient; 363 } 364 365 /* Extract subdomain relevant rows of G */ 366 PetscCall(ISLocalToGlobalMappingGetIndices(el2g,&idxs)); 367 PetscCall(ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned)); 368 PetscCall(MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall)); 369 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g,&idxs)); 370 PetscCall(ISDestroy(&lned)); 371 PetscCall(MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis)); 372 PetscCall(MatDestroy(&lGall)); 373 PetscCall(MatISGetLocalMat(lGis,&lG)); 374 375 /* SF for nodal dofs communications */ 376 PetscCall(MatGetLocalSize(G,NULL,&Lv)); 377 PetscCall(MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g)); 378 PetscCall(PetscObjectReference((PetscObject)vl2g)); 379 PetscCall(ISLocalToGlobalMappingGetSize(vl2g,&nv)); 380 PetscCall(PetscSFCreate(comm,&sfv)); 381 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g,&idxs)); 382 PetscCall(PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs)); 383 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs)); 384 i = singular ? 2 : 1; 385 PetscCall(PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots)); 386 387 /* Destroy temporary G created in MATIS format and modified G */ 388 PetscCall(PetscObjectReference((PetscObject)lG)); 389 PetscCall(MatDestroy(&lGis)); 390 PetscCall(MatDestroy(&G)); 391 392 if (print) { 393 PetscCall(PetscObjectSetName((PetscObject)lG,"initial_lG")); 394 PetscCall(MatView(lG,NULL)); 395 } 396 397 /* Save lG for values insertion in change of basis */ 398 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGinit)); 399 400 /* Analyze the edge-nodes connections (duplicate lG) */ 401 PetscCall(MatDuplicate(lG,MAT_COPY_VALUES,&lGe)); 402 PetscCall(MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 403 PetscCall(PetscBTCreate(nv,&btv)); 404 PetscCall(PetscBTCreate(ne,&bte)); 405 PetscCall(PetscBTCreate(ne,&btb)); 406 PetscCall(PetscBTCreate(ne,&btbd)); 407 PetscCall(PetscBTCreate(nv,&btvcand)); 408 /* need to import the boundary specification to ensure the 409 proper detection of coarse edges' endpoints */ 410 if (pcbddc->DirichletBoundariesLocal) { 411 IS is; 412 413 if (fl2g) { 414 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is)); 415 } else { 416 is = pcbddc->DirichletBoundariesLocal; 417 } 418 PetscCall(ISGetLocalSize(is,&cum)); 419 PetscCall(ISGetIndices(is,&idxs)); 420 for (i=0;i<cum;i++) { 421 if (idxs[i] >= 0) { 422 PetscCall(PetscBTSet(btb,idxs[i])); 423 PetscCall(PetscBTSet(btbd,idxs[i])); 424 } 425 } 426 PetscCall(ISRestoreIndices(is,&idxs)); 427 if (fl2g) { 428 PetscCall(ISDestroy(&is)); 429 } 430 } 431 if (pcbddc->NeumannBoundariesLocal) { 432 IS is; 433 434 if (fl2g) { 435 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is)); 436 } else { 437 is = pcbddc->NeumannBoundariesLocal; 438 } 439 PetscCall(ISGetLocalSize(is,&cum)); 440 PetscCall(ISGetIndices(is,&idxs)); 441 for (i=0;i<cum;i++) { 442 if (idxs[i] >= 0) { 443 PetscCall(PetscBTSet(btb,idxs[i])); 444 } 445 } 446 PetscCall(ISRestoreIndices(is,&idxs)); 447 if (fl2g) { 448 PetscCall(ISDestroy(&is)); 449 } 450 } 451 452 /* Count neighs per dof */ 453 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs)); 454 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs)); 455 456 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 457 for proper detection of coarse edges' endpoints */ 458 PetscCall(PetscBTCreate(ne,&btee)); 459 for (i=0;i<ne;i++) { 460 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 461 PetscCall(PetscBTSet(btee,i)); 462 } 463 } 464 PetscCall(PetscMalloc1(ne,&marks)); 465 if (!conforming) { 466 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 467 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 468 } 469 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 470 PetscCall(MatSeqAIJGetArray(lGe,&vals)); 471 cum = 0; 472 for (i=0;i<ne;i++) { 473 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 474 if (!PetscBTLookup(btee,i)) { 475 marks[cum++] = i; 476 continue; 477 } 478 /* set badly connected edge dofs as primal */ 479 if (!conforming) { 480 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 481 marks[cum++] = i; 482 PetscCall(PetscBTSet(bte,i)); 483 for (j=ii[i];j<ii[i+1];j++) { 484 PetscCall(PetscBTSet(btv,jj[j])); 485 } 486 } else { 487 /* every edge dofs should be connected trough a certain number of nodal dofs 488 to other edge dofs belonging to coarse edges 489 - at most 2 endpoints 490 - order-1 interior nodal dofs 491 - no undefined nodal dofs (nconn < order) 492 */ 493 PetscInt ends = 0,ints = 0, undef = 0; 494 for (j=ii[i];j<ii[i+1];j++) { 495 PetscInt v = jj[j],k; 496 PetscInt nconn = iit[v+1]-iit[v]; 497 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 498 if (nconn > order) ends++; 499 else if (nconn == order) ints++; 500 else undef++; 501 } 502 if (undef || ends > 2 || ints != order -1) { 503 marks[cum++] = i; 504 PetscCall(PetscBTSet(bte,i)); 505 for (j=ii[i];j<ii[i+1];j++) { 506 PetscCall(PetscBTSet(btv,jj[j])); 507 } 508 } 509 } 510 } 511 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 512 if (!order && ii[i+1] != ii[i]) { 513 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 514 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 515 } 516 } 517 PetscCall(PetscBTDestroy(&btee)); 518 PetscCall(MatSeqAIJRestoreArray(lGe,&vals)); 519 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 520 if (!conforming) { 521 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 522 PetscCall(MatDestroy(&lGt)); 523 } 524 PetscCall(MatZeroRows(lGe,cum,marks,0.,NULL,NULL)); 525 526 /* identify splitpoints and corner candidates */ 527 PetscCall(MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt)); 528 if (print) { 529 PetscCall(PetscObjectSetName((PetscObject)lGe,"edgerestr_lG")); 530 PetscCall(MatView(lGe,NULL)); 531 PetscCall(PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt")); 532 PetscCall(MatView(lGt,NULL)); 533 } 534 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 535 PetscCall(MatSeqAIJGetArray(lGt,&vals)); 536 for (i=0;i<nv;i++) { 537 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 538 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 539 if (!order) { /* variable order */ 540 PetscReal vorder = 0.; 541 542 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 543 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 544 PetscCheckFalse(vorder-test > PETSC_SQRT_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 545 ord = 1; 546 } 547 PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord); 548 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 549 if (PetscBTLookup(btbd,jj[j])) { 550 bdir = PETSC_TRUE; 551 break; 552 } 553 if (vc != ecount[jj[j]]) { 554 sneighs = PETSC_FALSE; 555 } else { 556 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 557 for (k=0;k<vc;k++) { 558 if (vn[k] != en[k]) { 559 sneighs = PETSC_FALSE; 560 break; 561 } 562 } 563 } 564 } 565 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 566 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 567 PetscCall(PetscBTSet(btv,i)); 568 } else if (test == ord) { 569 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 570 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 571 PetscCall(PetscBTSet(btv,i)); 572 } else { 573 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 574 PetscCall(PetscBTSet(btvcand,i)); 575 } 576 } 577 } 578 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs)); 579 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs)); 580 PetscCall(PetscBTDestroy(&btbd)); 581 582 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 583 if (order != 1) { 584 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 585 PetscCall(MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 586 for (i=0;i<nv;i++) { 587 if (PetscBTLookup(btvcand,i)) { 588 PetscBool found = PETSC_FALSE; 589 for (j=ii[i];j<ii[i+1] && !found;j++) { 590 PetscInt k,e = jj[j]; 591 if (PetscBTLookup(bte,e)) continue; 592 for (k=iit[e];k<iit[e+1];k++) { 593 PetscInt v = jjt[k]; 594 if (v != i && PetscBTLookup(btvcand,v)) { 595 found = PETSC_TRUE; 596 break; 597 } 598 } 599 } 600 if (!found) { 601 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 602 PetscCall(PetscBTClear(btvcand,i)); 603 } else { 604 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 605 } 606 } 607 } 608 PetscCall(MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 609 } 610 PetscCall(MatSeqAIJRestoreArray(lGt,&vals)); 611 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 612 PetscCall(MatDestroy(&lGe)); 613 614 /* Get the local G^T explicitly */ 615 PetscCall(MatDestroy(&lGt)); 616 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 617 PetscCall(MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 618 619 /* Mark interior nodal dofs */ 620 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 621 PetscCall(PetscBTCreate(nv,&btvi)); 622 for (i=1;i<n_neigh;i++) { 623 for (j=0;j<n_shared[i];j++) { 624 PetscCall(PetscBTSet(btvi,shared[i][j])); 625 } 626 } 627 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared)); 628 629 /* communicate corners and splitpoints */ 630 PetscCall(PetscMalloc1(nv,&vmarks)); 631 PetscCall(PetscArrayzero(sfvleaves,nv)); 632 PetscCall(PetscArrayzero(sfvroots,Lv)); 633 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 634 635 if (print) { 636 IS tbz; 637 638 cum = 0; 639 for (i=0;i<nv;i++) 640 if (sfvleaves[i]) 641 vmarks[cum++] = i; 642 643 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 644 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local")); 645 PetscCall(ISView(tbz,NULL)); 646 PetscCall(ISDestroy(&tbz)); 647 } 648 649 PetscCall(PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 650 PetscCall(PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM)); 651 PetscCall(PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 652 PetscCall(PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE)); 653 654 /* Zero rows of lGt corresponding to identified corners 655 and interior nodal dofs */ 656 cum = 0; 657 for (i=0;i<nv;i++) { 658 if (sfvleaves[i]) { 659 vmarks[cum++] = i; 660 PetscCall(PetscBTSet(btv,i)); 661 } 662 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 663 } 664 PetscCall(PetscBTDestroy(&btvi)); 665 if (print) { 666 IS tbz; 667 668 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz)); 669 PetscCall(PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior")); 670 PetscCall(ISView(tbz,NULL)); 671 PetscCall(ISDestroy(&tbz)); 672 } 673 PetscCall(MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL)); 674 PetscCall(PetscFree(vmarks)); 675 PetscCall(PetscSFDestroy(&sfv)); 676 PetscCall(PetscFree2(sfvleaves,sfvroots)); 677 678 /* Recompute G */ 679 PetscCall(MatDestroy(&lG)); 680 PetscCall(MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG)); 681 if (print) { 682 PetscCall(PetscObjectSetName((PetscObject)lG,"used_lG")); 683 PetscCall(MatView(lG,NULL)); 684 PetscCall(PetscObjectSetName((PetscObject)lGt,"used_lGt")); 685 PetscCall(MatView(lGt,NULL)); 686 } 687 688 /* Get primal dofs (if any) */ 689 cum = 0; 690 for (i=0;i<ne;i++) { 691 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 692 } 693 if (fl2g) { 694 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,marks,marks)); 695 } 696 PetscCall(ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals)); 697 if (print) { 698 PetscCall(PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs")); 699 PetscCall(ISView(primals,NULL)); 700 } 701 PetscCall(PetscBTDestroy(&bte)); 702 /* TODO: what if the user passed in some of them ? */ 703 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 704 PetscCall(ISDestroy(&primals)); 705 706 /* Compute edge connectivity */ 707 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_")); 708 709 /* Symbolic conn = lG*lGt */ 710 PetscCall(MatProductCreate(lG,lGt,NULL,&conn)); 711 PetscCall(MatProductSetType(conn,MATPRODUCT_AB)); 712 PetscCall(MatProductSetAlgorithm(conn,"default")); 713 PetscCall(MatProductSetFill(conn,PETSC_DEFAULT)); 714 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_")); 715 PetscCall(MatProductSetFromOptions(conn)); 716 PetscCall(MatProductSymbolic(conn)); 717 718 PetscCall(MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 719 if (fl2g) { 720 PetscBT btf; 721 PetscInt *iia,*jja,*iiu,*jju; 722 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 723 724 /* create CSR for all local dofs */ 725 PetscCall(PetscMalloc1(n+1,&iia)); 726 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 727 PetscCheckFalse(pcbddc->mat_graph->nvtxs_csr != n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 728 iiu = pcbddc->mat_graph->xadj; 729 jju = pcbddc->mat_graph->adjncy; 730 } else if (pcbddc->use_local_adj) { 731 rest = PETSC_TRUE; 732 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 733 } else { 734 free = PETSC_TRUE; 735 PetscCall(PetscMalloc2(n+1,&iiu,n,&jju)); 736 iiu[0] = 0; 737 for (i=0;i<n;i++) { 738 iiu[i+1] = i+1; 739 jju[i] = -1; 740 } 741 } 742 743 /* import sizes of CSR */ 744 iia[0] = 0; 745 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 746 747 /* overwrite entries corresponding to the Nedelec field */ 748 PetscCall(PetscBTCreate(n,&btf)); 749 PetscCall(ISGetIndices(nedfieldlocal,&idxs)); 750 for (i=0;i<ne;i++) { 751 PetscCall(PetscBTSet(btf,idxs[i])); 752 iia[idxs[i]+1] = ii[i+1]-ii[i]; 753 } 754 755 /* iia in CSR */ 756 for (i=0;i<n;i++) iia[i+1] += iia[i]; 757 758 /* jja in CSR */ 759 PetscCall(PetscMalloc1(iia[n],&jja)); 760 for (i=0;i<n;i++) 761 if (!PetscBTLookup(btf,i)) 762 for (j=0;j<iiu[i+1]-iiu[i];j++) 763 jja[iia[i]+j] = jju[iiu[i]+j]; 764 765 /* map edge dofs connectivity */ 766 if (jj) { 767 PetscCall(ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj)); 768 for (i=0;i<ne;i++) { 769 PetscInt e = idxs[i]; 770 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 771 } 772 } 773 PetscCall(ISRestoreIndices(nedfieldlocal,&idxs)); 774 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER)); 775 if (rest) { 776 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done)); 777 } 778 if (free) { 779 PetscCall(PetscFree2(iiu,jju)); 780 } 781 PetscCall(PetscBTDestroy(&btf)); 782 } else { 783 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER)); 784 } 785 786 /* Analyze interface for edge dofs */ 787 PetscCall(PCBDDCAnalyzeInterface(pc)); 788 pcbddc->mat_graph->twodim = PETSC_FALSE; 789 790 /* Get coarse edges in the edge space */ 791 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 792 PetscCall(MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 793 794 if (fl2g) { 795 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 796 PetscCall(PetscMalloc1(nee,&eedges)); 797 for (i=0;i<nee;i++) { 798 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 799 } 800 } else { 801 eedges = alleedges; 802 primals = allprimals; 803 } 804 805 /* Mark fine edge dofs with their coarse edge id */ 806 PetscCall(PetscArrayzero(marks,ne)); 807 PetscCall(ISGetLocalSize(primals,&cum)); 808 PetscCall(ISGetIndices(primals,&idxs)); 809 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 810 PetscCall(ISRestoreIndices(primals,&idxs)); 811 if (print) { 812 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs")); 813 PetscCall(ISView(primals,NULL)); 814 } 815 816 maxsize = 0; 817 for (i=0;i<nee;i++) { 818 PetscInt size,mark = i+1; 819 820 PetscCall(ISGetLocalSize(eedges[i],&size)); 821 PetscCall(ISGetIndices(eedges[i],&idxs)); 822 for (j=0;j<size;j++) marks[idxs[j]] = mark; 823 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 824 maxsize = PetscMax(maxsize,size); 825 } 826 827 /* Find coarse edge endpoints */ 828 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 829 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 830 for (i=0;i<nee;i++) { 831 PetscInt mark = i+1,size; 832 833 PetscCall(ISGetLocalSize(eedges[i],&size)); 834 if (!size && nedfieldlocal) continue; 835 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 836 PetscCall(ISGetIndices(eedges[i],&idxs)); 837 if (print) { 838 PetscCall(PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i)); 839 PetscCall(ISView(eedges[i],NULL)); 840 } 841 for (j=0;j<size;j++) { 842 PetscInt k, ee = idxs[j]; 843 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 844 for (k=ii[ee];k<ii[ee+1];k++) { 845 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 846 if (PetscBTLookup(btv,jj[k])) { 847 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 848 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 849 PetscInt k2; 850 PetscBool corner = PETSC_FALSE; 851 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 852 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 853 /* it's a corner if either is connected with an edge dof belonging to a different cc or 854 if the edge dof lie on the natural part of the boundary */ 855 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 856 corner = PETSC_TRUE; 857 break; 858 } 859 } 860 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 861 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 862 PetscCall(PetscBTSet(btv,jj[k])); 863 } else { 864 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 865 } 866 } 867 } 868 } 869 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 870 } 871 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 872 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 873 PetscCall(PetscBTDestroy(&btb)); 874 875 /* Reset marked primal dofs */ 876 PetscCall(ISGetLocalSize(primals,&cum)); 877 PetscCall(ISGetIndices(primals,&idxs)); 878 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 879 PetscCall(ISRestoreIndices(primals,&idxs)); 880 881 /* Now use the initial lG */ 882 PetscCall(MatDestroy(&lG)); 883 PetscCall(MatDestroy(&lGt)); 884 lG = lGinit; 885 PetscCall(MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt)); 886 887 /* Compute extended cols indices */ 888 PetscCall(PetscBTCreate(nv,&btvc)); 889 PetscCall(PetscBTCreate(nee,&bter)); 890 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 891 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG,&i)); 892 i *= maxsize; 893 PetscCall(PetscCalloc1(nee,&extcols)); 894 PetscCall(PetscMalloc2(i,&extrow,i,&gidxs)); 895 eerr = PETSC_FALSE; 896 for (i=0;i<nee;i++) { 897 PetscInt size,found = 0; 898 899 cum = 0; 900 PetscCall(ISGetLocalSize(eedges[i],&size)); 901 if (!size && nedfieldlocal) continue; 902 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 903 PetscCall(ISGetIndices(eedges[i],&idxs)); 904 PetscCall(PetscBTMemzero(nv,btvc)); 905 for (j=0;j<size;j++) { 906 PetscInt k,ee = idxs[j]; 907 for (k=ii[ee];k<ii[ee+1];k++) { 908 PetscInt vv = jj[k]; 909 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 910 else if (!PetscBTLookupSet(btvc,vv)) found++; 911 } 912 } 913 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 914 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 915 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 916 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 917 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 918 /* it may happen that endpoints are not defined at this point 919 if it is the case, mark this edge for a second pass */ 920 if (cum != size -1 || found != 2) { 921 PetscCall(PetscBTSet(bter,i)); 922 if (print) { 923 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge")); 924 PetscCall(ISView(eedges[i],NULL)); 925 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol")); 926 PetscCall(ISView(extcols[i],NULL)); 927 } 928 eerr = PETSC_TRUE; 929 } 930 } 931 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 932 PetscCallMPI(MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm)); 933 if (done) { 934 PetscInt *newprimals; 935 936 PetscCall(PetscMalloc1(ne,&newprimals)); 937 PetscCall(ISGetLocalSize(primals,&cum)); 938 PetscCall(ISGetIndices(primals,&idxs)); 939 PetscCall(PetscArraycpy(newprimals,idxs,cum)); 940 PetscCall(ISRestoreIndices(primals,&idxs)); 941 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 942 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 943 for (i=0;i<nee;i++) { 944 PetscBool has_candidates = PETSC_FALSE; 945 if (PetscBTLookup(bter,i)) { 946 PetscInt size,mark = i+1; 947 948 PetscCall(ISGetLocalSize(eedges[i],&size)); 949 PetscCall(ISGetIndices(eedges[i],&idxs)); 950 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 951 for (j=0;j<size;j++) { 952 PetscInt k,ee = idxs[j]; 953 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 954 for (k=ii[ee];k<ii[ee+1];k++) { 955 /* set all candidates located on the edge as corners */ 956 if (PetscBTLookup(btvcand,jj[k])) { 957 PetscInt k2,vv = jj[k]; 958 has_candidates = PETSC_TRUE; 959 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 960 PetscCall(PetscBTSet(btv,vv)); 961 /* set all edge dofs connected to candidate as primals */ 962 for (k2=iit[vv];k2<iit[vv+1];k2++) { 963 if (marks[jjt[k2]] == mark) { 964 PetscInt k3,ee2 = jjt[k2]; 965 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 966 newprimals[cum++] = ee2; 967 /* finally set the new corners */ 968 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 969 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 970 PetscCall(PetscBTSet(btv,jj[k3])); 971 } 972 } 973 } 974 } else { 975 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 976 } 977 } 978 } 979 if (!has_candidates) { /* circular edge */ 980 PetscInt k, ee = idxs[0],*tmarks; 981 982 PetscCall(PetscCalloc1(ne,&tmarks)); 983 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 984 for (k=ii[ee];k<ii[ee+1];k++) { 985 PetscInt k2; 986 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 987 PetscCall(PetscBTSet(btv,jj[k])); 988 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 989 } 990 for (j=0;j<size;j++) { 991 if (tmarks[idxs[j]] > 1) { 992 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 993 newprimals[cum++] = idxs[j]; 994 } 995 } 996 PetscCall(PetscFree(tmarks)); 997 } 998 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 999 } 1000 PetscCall(ISDestroy(&extcols[i])); 1001 } 1002 PetscCall(PetscFree(extcols)); 1003 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done)); 1004 PetscCall(PetscSortRemoveDupsInt(&cum,newprimals)); 1005 if (fl2g) { 1006 PetscCall(ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals)); 1007 PetscCall(ISDestroy(&primals)); 1008 for (i=0;i<nee;i++) { 1009 PetscCall(ISDestroy(&eedges[i])); 1010 } 1011 PetscCall(PetscFree(eedges)); 1012 } 1013 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1014 PetscCall(ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals)); 1015 PetscCall(PetscFree(newprimals)); 1016 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primals)); 1017 PetscCall(ISDestroy(&primals)); 1018 PetscCall(PCBDDCAnalyzeInterface(pc)); 1019 pcbddc->mat_graph->twodim = PETSC_FALSE; 1020 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1021 if (fl2g) { 1022 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals)); 1023 PetscCall(PetscMalloc1(nee,&eedges)); 1024 for (i=0;i<nee;i++) { 1025 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i])); 1026 } 1027 } else { 1028 eedges = alleedges; 1029 primals = allprimals; 1030 } 1031 PetscCall(PetscCalloc1(nee,&extcols)); 1032 1033 /* Mark again */ 1034 PetscCall(PetscArrayzero(marks,ne)); 1035 for (i=0;i<nee;i++) { 1036 PetscInt size,mark = i+1; 1037 1038 PetscCall(ISGetLocalSize(eedges[i],&size)); 1039 PetscCall(ISGetIndices(eedges[i],&idxs)); 1040 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1041 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1042 } 1043 if (print) { 1044 PetscCall(PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass")); 1045 PetscCall(ISView(primals,NULL)); 1046 } 1047 1048 /* Recompute extended cols */ 1049 eerr = PETSC_FALSE; 1050 for (i=0;i<nee;i++) { 1051 PetscInt size; 1052 1053 cum = 0; 1054 PetscCall(ISGetLocalSize(eedges[i],&size)); 1055 if (!size && nedfieldlocal) continue; 1056 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1057 PetscCall(ISGetIndices(eedges[i],&idxs)); 1058 for (j=0;j<size;j++) { 1059 PetscInt k,ee = idxs[j]; 1060 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1061 } 1062 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1063 PetscCall(PetscSortRemoveDupsInt(&cum,extrow)); 1064 PetscCall(ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs)); 1065 PetscCall(PetscSortIntWithArray(cum,gidxs,extrow)); 1066 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i])); 1067 if (cum != size -1) { 1068 if (print) { 1069 PetscCall(PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass")); 1070 PetscCall(ISView(eedges[i],NULL)); 1071 PetscCall(PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass")); 1072 PetscCall(ISView(extcols[i],NULL)); 1073 } 1074 eerr = PETSC_TRUE; 1075 } 1076 } 1077 } 1078 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1079 PetscCall(PetscFree2(extrow,gidxs)); 1080 PetscCall(PetscBTDestroy(&bter)); 1081 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF)); 1082 /* an error should not occur at this point */ 1083 PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1084 1085 /* Check the number of endpoints */ 1086 PetscCall(MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1087 PetscCall(PetscMalloc1(2*nee,&corners)); 1088 PetscCall(PetscMalloc1(nee,&cedges)); 1089 for (i=0;i<nee;i++) { 1090 PetscInt size, found = 0, gc[2]; 1091 1092 /* init with defaults */ 1093 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1094 PetscCall(ISGetLocalSize(eedges[i],&size)); 1095 if (!size && nedfieldlocal) continue; 1096 PetscCheck(size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1097 PetscCall(ISGetIndices(eedges[i],&idxs)); 1098 PetscCall(PetscBTMemzero(nv,btvc)); 1099 for (j=0;j<size;j++) { 1100 PetscInt k,ee = idxs[j]; 1101 for (k=ii[ee];k<ii[ee+1];k++) { 1102 PetscInt vv = jj[k]; 1103 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1104 PetscCheckFalse(found == 2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1105 corners[i*2+found++] = vv; 1106 } 1107 } 1108 } 1109 if (found != 2) { 1110 PetscInt e; 1111 if (fl2g) { 1112 PetscCall(ISLocalToGlobalMappingApply(fl2g,1,idxs,&e)); 1113 } else { 1114 e = idxs[0]; 1115 } 1116 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1117 } 1118 1119 /* get primal dof index on this coarse edge */ 1120 PetscCall(ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc)); 1121 if (gc[0] > gc[1]) { 1122 PetscInt swap = corners[2*i]; 1123 corners[2*i] = corners[2*i+1]; 1124 corners[2*i+1] = swap; 1125 } 1126 cedges[i] = idxs[size-1]; 1127 PetscCall(ISRestoreIndices(eedges[i],&idxs)); 1128 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1129 } 1130 PetscCall(MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1131 PetscCall(PetscBTDestroy(&btvc)); 1132 1133 if (PetscDefined(USE_DEBUG)) { 1134 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1135 not interfere with neighbouring coarse edges */ 1136 PetscCall(PetscMalloc1(nee+1,&emarks)); 1137 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1138 for (i=0;i<nv;i++) { 1139 PetscInt emax = 0,eemax = 0; 1140 1141 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1142 PetscCall(PetscArrayzero(emarks,nee+1)); 1143 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1144 for (j=1;j<nee+1;j++) { 1145 if (emax < emarks[j]) { 1146 emax = emarks[j]; 1147 eemax = j; 1148 } 1149 } 1150 /* not relevant for edges */ 1151 if (!eemax) continue; 1152 1153 for (j=ii[i];j<ii[i+1];j++) { 1154 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1155 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1156 } 1157 } 1158 } 1159 PetscCall(PetscFree(emarks)); 1160 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1161 } 1162 1163 /* Compute extended rows indices for edge blocks of the change of basis */ 1164 PetscCall(MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1165 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt,&extmem)); 1166 extmem *= maxsize; 1167 PetscCall(PetscMalloc1(extmem*nee,&extrow)); 1168 PetscCall(PetscMalloc1(nee,&extrows)); 1169 PetscCall(PetscCalloc1(nee,&extrowcum)); 1170 for (i=0;i<nv;i++) { 1171 PetscInt mark = 0,size,start; 1172 1173 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1174 for (j=ii[i];j<ii[i+1];j++) 1175 if (marks[jj[j]] && !mark) 1176 mark = marks[jj[j]]; 1177 1178 /* not relevant */ 1179 if (!mark) continue; 1180 1181 /* import extended row */ 1182 mark--; 1183 start = mark*extmem+extrowcum[mark]; 1184 size = ii[i+1]-ii[i]; 1185 PetscCheckFalse(extrowcum[mark] + size > extmem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1186 PetscCall(PetscArraycpy(extrow+start,jj+ii[i],size)); 1187 extrowcum[mark] += size; 1188 } 1189 PetscCall(MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done)); 1190 PetscCall(MatDestroy(&lGt)); 1191 PetscCall(PetscFree(marks)); 1192 1193 /* Compress extrows */ 1194 cum = 0; 1195 for (i=0;i<nee;i++) { 1196 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1197 PetscCall(PetscSortRemoveDupsInt(&size,start)); 1198 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i])); 1199 cum = PetscMax(cum,size); 1200 } 1201 PetscCall(PetscFree(extrowcum)); 1202 PetscCall(PetscBTDestroy(&btv)); 1203 PetscCall(PetscBTDestroy(&btvcand)); 1204 1205 /* Workspace for lapack inner calls and VecSetValues */ 1206 PetscCall(PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork)); 1207 1208 /* Create change of basis matrix (preallocation can be improved) */ 1209 PetscCall(MatCreate(comm,&T)); 1210 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1211 pc->pmat->rmap->N,pc->pmat->rmap->N);PetscCall(ierr); 1212 PetscCall(MatSetType(T,MATAIJ)); 1213 PetscCall(MatSeqAIJSetPreallocation(T,10,NULL)); 1214 PetscCall(MatMPIAIJSetPreallocation(T,10,NULL,10,NULL)); 1215 PetscCall(MatSetLocalToGlobalMapping(T,al2g,al2g)); 1216 PetscCall(MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 1217 PetscCall(MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE)); 1218 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1219 1220 /* Defaults to identity */ 1221 PetscCall(MatCreateVecs(pc->pmat,&tvec,NULL)); 1222 PetscCall(VecSet(tvec,1.0)); 1223 PetscCall(MatDiagonalSet(T,tvec,INSERT_VALUES)); 1224 PetscCall(VecDestroy(&tvec)); 1225 1226 /* Create discrete gradient for the coarser level if needed */ 1227 PetscCall(MatDestroy(&pcbddc->nedcG)); 1228 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1229 if (pcbddc->current_level < pcbddc->max_levels) { 1230 ISLocalToGlobalMapping cel2g,cvl2g; 1231 IS wis,gwis; 1232 PetscInt cnv,cne; 1233 1234 PetscCall(ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis)); 1235 if (fl2g) { 1236 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal)); 1237 } else { 1238 PetscCall(PetscObjectReference((PetscObject)wis)); 1239 pcbddc->nedclocal = wis; 1240 } 1241 PetscCall(ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis)); 1242 PetscCall(ISDestroy(&wis)); 1243 PetscCall(ISRenumber(gwis,NULL,&cne,&wis)); 1244 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cel2g)); 1245 PetscCall(ISDestroy(&wis)); 1246 PetscCall(ISDestroy(&gwis)); 1247 1248 PetscCall(ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis)); 1249 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis)); 1250 PetscCall(ISDestroy(&wis)); 1251 PetscCall(ISRenumber(gwis,NULL,&cnv,&wis)); 1252 PetscCall(ISLocalToGlobalMappingCreateIS(wis,&cvl2g)); 1253 PetscCall(ISDestroy(&wis)); 1254 PetscCall(ISDestroy(&gwis)); 1255 1256 PetscCall(MatCreate(comm,&pcbddc->nedcG)); 1257 PetscCall(MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv)); 1258 PetscCall(MatSetType(pcbddc->nedcG,MATAIJ)); 1259 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL)); 1260 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL)); 1261 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g)); 1262 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1263 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1264 } 1265 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1266 1267 #if defined(PRINT_GDET) 1268 inc = 0; 1269 lev = pcbddc->current_level; 1270 #endif 1271 1272 /* Insert values in the change of basis matrix */ 1273 for (i=0;i<nee;i++) { 1274 Mat Gins = NULL, GKins = NULL; 1275 IS cornersis = NULL; 1276 PetscScalar cvals[2]; 1277 1278 if (pcbddc->nedcG) { 1279 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis)); 1280 } 1281 PetscCall(PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork)); 1282 if (Gins && GKins) { 1283 const PetscScalar *data; 1284 const PetscInt *rows,*cols; 1285 PetscInt nrh,nch,nrc,ncc; 1286 1287 PetscCall(ISGetIndices(eedges[i],&cols)); 1288 /* H1 */ 1289 PetscCall(ISGetIndices(extrows[i],&rows)); 1290 PetscCall(MatGetSize(Gins,&nrh,&nch)); 1291 PetscCall(MatDenseGetArrayRead(Gins,&data)); 1292 PetscCall(MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES)); 1293 PetscCall(MatDenseRestoreArrayRead(Gins,&data)); 1294 PetscCall(ISRestoreIndices(extrows[i],&rows)); 1295 /* complement */ 1296 PetscCall(MatGetSize(GKins,&nrc,&ncc)); 1297 PetscCheck(ncc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1298 PetscCheckFalse(ncc + nch != nrc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1299 PetscCheckFalse(ncc != 1 && pcbddc->nedcG,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1300 PetscCall(MatDenseGetArrayRead(GKins,&data)); 1301 PetscCall(MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES)); 1302 PetscCall(MatDenseRestoreArrayRead(GKins,&data)); 1303 1304 /* coarse discrete gradient */ 1305 if (pcbddc->nedcG) { 1306 PetscInt cols[2]; 1307 1308 cols[0] = 2*i; 1309 cols[1] = 2*i+1; 1310 PetscCall(MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES)); 1311 } 1312 PetscCall(ISRestoreIndices(eedges[i],&cols)); 1313 } 1314 PetscCall(ISDestroy(&extrows[i])); 1315 PetscCall(ISDestroy(&extcols[i])); 1316 PetscCall(ISDestroy(&cornersis)); 1317 PetscCall(MatDestroy(&Gins)); 1318 PetscCall(MatDestroy(&GKins)); 1319 } 1320 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1321 1322 /* Start assembling */ 1323 PetscCall(MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY)); 1324 if (pcbddc->nedcG) { 1325 PetscCall(MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1326 } 1327 1328 /* Free */ 1329 if (fl2g) { 1330 PetscCall(ISDestroy(&primals)); 1331 for (i=0;i<nee;i++) { 1332 PetscCall(ISDestroy(&eedges[i])); 1333 } 1334 PetscCall(PetscFree(eedges)); 1335 } 1336 1337 /* hack mat_graph with primal dofs on the coarse edges */ 1338 { 1339 PCBDDCGraph graph = pcbddc->mat_graph; 1340 PetscInt *oqueue = graph->queue; 1341 PetscInt *ocptr = graph->cptr; 1342 PetscInt ncc,*idxs; 1343 1344 /* find first primal edge */ 1345 if (pcbddc->nedclocal) { 1346 PetscCall(ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1347 } else { 1348 if (fl2g) { 1349 PetscCall(ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges)); 1350 } 1351 idxs = cedges; 1352 } 1353 cum = 0; 1354 while (cum < nee && cedges[cum] < 0) cum++; 1355 1356 /* adapt connected components */ 1357 PetscCall(PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue)); 1358 graph->cptr[0] = 0; 1359 for (i=0,ncc=0;i<graph->ncc;i++) { 1360 PetscInt lc = ocptr[i+1]-ocptr[i]; 1361 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1362 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1363 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1364 ncc++; 1365 lc--; 1366 cum++; 1367 while (cum < nee && cedges[cum] < 0) cum++; 1368 } 1369 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1370 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1371 ncc++; 1372 } 1373 graph->ncc = ncc; 1374 if (pcbddc->nedclocal) { 1375 PetscCall(ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs)); 1376 } 1377 PetscCall(PetscFree2(ocptr,oqueue)); 1378 } 1379 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1380 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals)); 1381 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1382 PetscCall(MatDestroy(&conn)); 1383 1384 PetscCall(ISDestroy(&nedfieldlocal)); 1385 PetscCall(PetscFree(extrow)); 1386 PetscCall(PetscFree2(work,rwork)); 1387 PetscCall(PetscFree(corners)); 1388 PetscCall(PetscFree(cedges)); 1389 PetscCall(PetscFree(extrows)); 1390 PetscCall(PetscFree(extcols)); 1391 PetscCall(MatDestroy(&lG)); 1392 1393 /* Complete assembling */ 1394 PetscCall(MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY)); 1395 if (pcbddc->nedcG) { 1396 PetscCall(MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY)); 1397 #if 0 1398 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1399 PetscCall(MatView(pcbddc->nedcG,NULL)); 1400 #endif 1401 } 1402 1403 /* set change of basis */ 1404 PetscCall(PCBDDCSetChangeOfBasisMat(pc,T,singular)); 1405 PetscCall(MatDestroy(&T)); 1406 1407 PetscFunctionReturn(0); 1408 } 1409 1410 /* the near-null space of BDDC carries information on quadrature weights, 1411 and these can be collinear -> so cheat with MatNullSpaceCreate 1412 and create a suitable set of basis vectors first */ 1413 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1414 { 1415 PetscInt i; 1416 1417 PetscFunctionBegin; 1418 for (i=0;i<nvecs;i++) { 1419 PetscInt first,last; 1420 1421 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1422 PetscCheckFalse(last-first < 2*nvecs && has_const,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1423 if (i>=first && i < last) { 1424 PetscScalar *data; 1425 PetscCall(VecGetArray(quad_vecs[i],&data)); 1426 if (!has_const) { 1427 data[i-first] = 1.; 1428 } else { 1429 data[2*i-first] = 1./PetscSqrtReal(2.); 1430 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1431 } 1432 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1433 } 1434 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1435 } 1436 PetscCall(MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp)); 1437 for (i=0;i<nvecs;i++) { /* reset vectors */ 1438 PetscInt first,last; 1439 PetscCall(VecLockReadPop(quad_vecs[i])); 1440 PetscCall(VecGetOwnershipRange(quad_vecs[i],&first,&last)); 1441 if (i>=first && i < last) { 1442 PetscScalar *data; 1443 PetscCall(VecGetArray(quad_vecs[i],&data)); 1444 if (!has_const) { 1445 data[i-first] = 0.; 1446 } else { 1447 data[2*i-first] = 0.; 1448 data[2*i-first+1] = 0.; 1449 } 1450 PetscCall(VecRestoreArray(quad_vecs[i],&data)); 1451 } 1452 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1453 PetscCall(VecLockReadPush(quad_vecs[i])); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1459 { 1460 Mat loc_divudotp; 1461 Vec p,v,vins,quad_vec,*quad_vecs; 1462 ISLocalToGlobalMapping map; 1463 PetscScalar *vals; 1464 const PetscScalar *array; 1465 PetscInt i,maxneighs = 0,maxsize,*gidxs; 1466 PetscInt n_neigh,*neigh,*n_shared,**shared; 1467 PetscMPIInt rank; 1468 1469 PetscFunctionBegin; 1470 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1471 for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs); 1472 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A))); 1473 if (!maxneighs) { 1474 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1475 *nnsp = NULL; 1476 PetscFunctionReturn(0); 1477 } 1478 maxsize = 0; 1479 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1480 PetscCall(PetscMalloc2(maxsize,&gidxs,maxsize,&vals)); 1481 /* create vectors to hold quadrature weights */ 1482 PetscCall(MatCreateVecs(A,&quad_vec,NULL)); 1483 if (!transpose) { 1484 PetscCall(MatISGetLocalToGlobalMapping(A,&map,NULL)); 1485 } else { 1486 PetscCall(MatISGetLocalToGlobalMapping(A,NULL,&map)); 1487 } 1488 PetscCall(VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs)); 1489 PetscCall(VecDestroy(&quad_vec)); 1490 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp)); 1491 for (i=0;i<maxneighs;i++) { 1492 PetscCall(VecLockReadPop(quad_vecs[i])); 1493 } 1494 1495 /* compute local quad vec */ 1496 PetscCall(MatISGetLocalMat(divudotp,&loc_divudotp)); 1497 if (!transpose) { 1498 PetscCall(MatCreateVecs(loc_divudotp,&v,&p)); 1499 } else { 1500 PetscCall(MatCreateVecs(loc_divudotp,&p,&v)); 1501 } 1502 PetscCall(VecSet(p,1.)); 1503 if (!transpose) { 1504 PetscCall(MatMultTranspose(loc_divudotp,p,v)); 1505 } else { 1506 PetscCall(MatMult(loc_divudotp,p,v)); 1507 } 1508 if (vl2l) { 1509 Mat lA; 1510 VecScatter sc; 1511 1512 PetscCall(MatISGetLocalMat(A,&lA)); 1513 PetscCall(MatCreateVecs(lA,&vins,NULL)); 1514 PetscCall(VecScatterCreate(v,NULL,vins,vl2l,&sc)); 1515 PetscCall(VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1516 PetscCall(VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD)); 1517 PetscCall(VecScatterDestroy(&sc)); 1518 } else { 1519 vins = v; 1520 } 1521 PetscCall(VecGetArrayRead(vins,&array)); 1522 PetscCall(VecDestroy(&p)); 1523 1524 /* insert in global quadrature vecs */ 1525 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 1526 for (i=1;i<n_neigh;i++) { 1527 const PetscInt *idxs; 1528 PetscInt idx,nn,j; 1529 1530 idxs = shared[i]; 1531 nn = n_shared[i]; 1532 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1533 PetscCall(PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx)); 1534 idx = -(idx+1); 1535 PetscCheckFalse(idx < 0 || idx >= maxneighs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs); 1536 PetscCall(ISLocalToGlobalMappingApply(map,nn,idxs,gidxs)); 1537 PetscCall(VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES)); 1538 } 1539 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared)); 1540 PetscCall(VecRestoreArrayRead(vins,&array)); 1541 if (vl2l) { 1542 PetscCall(VecDestroy(&vins)); 1543 } 1544 PetscCall(VecDestroy(&v)); 1545 PetscCall(PetscFree2(gidxs,vals)); 1546 1547 /* assemble near null space */ 1548 for (i=0;i<maxneighs;i++) { 1549 PetscCall(VecAssemblyBegin(quad_vecs[i])); 1550 } 1551 for (i=0;i<maxneighs;i++) { 1552 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1553 PetscCall(VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view")); 1554 PetscCall(VecLockReadPush(quad_vecs[i])); 1555 } 1556 PetscCall(VecDestroyVecs(maxneighs,&quad_vecs)); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1561 { 1562 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1563 1564 PetscFunctionBegin; 1565 if (primalv) { 1566 if (pcbddc->user_primal_vertices_local) { 1567 IS list[2], newp; 1568 1569 list[0] = primalv; 1570 list[1] = pcbddc->user_primal_vertices_local; 1571 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp)); 1572 PetscCall(ISSortRemoveDups(newp)); 1573 PetscCall(ISDestroy(&list[1])); 1574 pcbddc->user_primal_vertices_local = newp; 1575 } else { 1576 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc,primalv)); 1577 } 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1583 { 1584 PetscInt f, *comp = (PetscInt *)ctx; 1585 1586 PetscFunctionBegin; 1587 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1592 { 1593 PetscErrorCode ierr; 1594 Vec local,global; 1595 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1596 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1597 PetscBool monolithic = PETSC_FALSE; 1598 1599 PetscFunctionBegin; 1600 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");PetscCall(ierr); 1601 PetscCall(PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL)); 1602 ierr = PetscOptionsEnd();PetscCall(ierr); 1603 /* need to convert from global to local topology information and remove references to information in global ordering */ 1604 PetscCall(MatCreateVecs(pc->pmat,&global,NULL)); 1605 PetscCall(MatCreateVecs(matis->A,&local,NULL)); 1606 PetscCall(VecBindToCPU(global,PETSC_TRUE)); 1607 PetscCall(VecBindToCPU(local,PETSC_TRUE)); 1608 if (monolithic) { /* just get block size to properly compute vertices */ 1609 if (pcbddc->vertex_size == 1) { 1610 PetscCall(MatGetBlockSize(pc->pmat,&pcbddc->vertex_size)); 1611 } 1612 goto boundary; 1613 } 1614 1615 if (pcbddc->user_provided_isfordofs) { 1616 if (pcbddc->n_ISForDofs) { 1617 PetscInt i; 1618 1619 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal)); 1620 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1621 PetscInt bs; 1622 1623 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i])); 1624 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i],&bs)); 1625 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1626 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1627 } 1628 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1629 pcbddc->n_ISForDofs = 0; 1630 PetscCall(PetscFree(pcbddc->ISForDofs)); 1631 } 1632 } else { 1633 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1634 DM dm; 1635 1636 PetscCall(MatGetDM(pc->pmat, &dm)); 1637 if (!dm) { 1638 PetscCall(PCGetDM(pc, &dm)); 1639 } 1640 if (dm) { 1641 IS *fields; 1642 PetscInt nf,i; 1643 1644 PetscCall(DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL)); 1645 PetscCall(PetscMalloc1(nf,&pcbddc->ISForDofsLocal)); 1646 for (i=0;i<nf;i++) { 1647 PetscInt bs; 1648 1649 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i])); 1650 PetscCall(ISGetBlockSize(fields[i],&bs)); 1651 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs)); 1652 PetscCall(ISDestroy(&fields[i])); 1653 } 1654 PetscCall(PetscFree(fields)); 1655 pcbddc->n_ISForDofsLocal = nf; 1656 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1657 PetscContainer c; 1658 1659 PetscCall(PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c)); 1660 if (c) { 1661 MatISLocalFields lf; 1662 PetscCall(PetscContainerGetPointer(c,(void**)&lf)); 1663 PetscCall(PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf)); 1664 } else { /* fallback, create the default fields if bs > 1 */ 1665 PetscInt i, n = matis->A->rmap->n; 1666 PetscCall(MatGetBlockSize(pc->pmat,&i)); 1667 if (i > 1) { 1668 pcbddc->n_ISForDofsLocal = i; 1669 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal)); 1670 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1671 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i])); 1672 } 1673 } 1674 } 1675 } 1676 } else { 1677 PetscInt i; 1678 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1679 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i])); 1680 } 1681 } 1682 } 1683 1684 boundary: 1685 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1686 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal)); 1687 } else if (pcbddc->DirichletBoundariesLocal) { 1688 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal)); 1689 } 1690 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1691 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal)); 1692 } else if (pcbddc->NeumannBoundariesLocal) { 1693 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal)); 1694 } 1695 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1696 PetscCall(PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local)); 1697 } 1698 PetscCall(VecDestroy(&global)); 1699 PetscCall(VecDestroy(&local)); 1700 /* detect local disconnected subdomains if requested (use matis->A) */ 1701 if (pcbddc->detect_disconnected) { 1702 IS primalv = NULL; 1703 PetscInt i; 1704 PetscBool filter = pcbddc->detect_disconnected_filter; 1705 1706 for (i=0;i<pcbddc->n_local_subs;i++) { 1707 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1708 } 1709 PetscCall(PetscFree(pcbddc->local_subs)); 1710 PetscCall(PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv)); 1711 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,primalv)); 1712 PetscCall(ISDestroy(&primalv)); 1713 } 1714 /* early stage corner detection */ 1715 { 1716 DM dm; 1717 1718 PetscCall(MatGetDM(pc->pmat,&dm)); 1719 if (!dm) { 1720 PetscCall(PCGetDM(pc,&dm)); 1721 } 1722 if (dm) { 1723 PetscBool isda; 1724 1725 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda)); 1726 if (isda) { 1727 ISLocalToGlobalMapping l2l; 1728 IS corners; 1729 Mat lA; 1730 PetscBool gl,lo; 1731 1732 { 1733 Vec cvec; 1734 const PetscScalar *coords; 1735 PetscInt dof,n,cdim; 1736 PetscBool memc = PETSC_TRUE; 1737 1738 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1739 PetscCall(DMGetCoordinates(dm,&cvec)); 1740 PetscCall(VecGetLocalSize(cvec,&n)); 1741 PetscCall(VecGetBlockSize(cvec,&cdim)); 1742 n /= cdim; 1743 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1744 PetscCall(PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords)); 1745 PetscCall(VecGetArrayRead(cvec,&coords)); 1746 #if defined(PETSC_USE_COMPLEX) 1747 memc = PETSC_FALSE; 1748 #endif 1749 if (dof != 1) memc = PETSC_FALSE; 1750 if (memc) { 1751 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof)); 1752 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1753 PetscReal *bcoords = pcbddc->mat_graph->coords; 1754 PetscInt i, b, d; 1755 1756 for (i=0;i<n;i++) { 1757 for (b=0;b<dof;b++) { 1758 for (d=0;d<cdim;d++) { 1759 bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]); 1760 } 1761 } 1762 } 1763 } 1764 PetscCall(VecRestoreArrayRead(cvec,&coords)); 1765 pcbddc->mat_graph->cdim = cdim; 1766 pcbddc->mat_graph->cnloc = dof*n; 1767 pcbddc->mat_graph->cloc = PETSC_FALSE; 1768 } 1769 PetscCall(DMDAGetSubdomainCornersIS(dm,&corners)); 1770 PetscCall(MatISGetLocalMat(pc->pmat,&lA)); 1771 PetscCall(MatGetLocalToGlobalMapping(lA,&l2l,NULL)); 1772 PetscCall(MatISRestoreLocalMat(pc->pmat,&lA)); 1773 lo = (PetscBool)(l2l && corners); 1774 PetscCallMPI(MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 1775 if (gl) { /* From PETSc's DMDA */ 1776 const PetscInt *idx; 1777 PetscInt dof,bs,*idxout,n; 1778 1779 PetscCall(DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); 1780 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l,&bs)); 1781 PetscCall(ISGetLocalSize(corners,&n)); 1782 PetscCall(ISGetIndices(corners,&idx)); 1783 if (bs == dof) { 1784 PetscCall(PetscMalloc1(n,&idxout)); 1785 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout)); 1786 } else { /* the original DMDA local-to-local map have been modified */ 1787 PetscInt i,d; 1788 1789 PetscCall(PetscMalloc1(dof*n,&idxout)); 1790 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1791 PetscCall(ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout)); 1792 1793 bs = 1; 1794 n *= dof; 1795 } 1796 PetscCall(ISRestoreIndices(corners,&idx)); 1797 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1798 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners)); 1799 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc,corners)); 1800 PetscCall(ISDestroy(&corners)); 1801 pcbddc->corner_selected = PETSC_TRUE; 1802 pcbddc->corner_selection = PETSC_TRUE; 1803 } 1804 if (corners) { 1805 PetscCall(DMDARestoreSubdomainCornersIS(dm,&corners)); 1806 } 1807 } 1808 } 1809 } 1810 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1811 DM dm; 1812 1813 PetscCall(MatGetDM(pc->pmat,&dm)); 1814 if (!dm) { 1815 PetscCall(PCGetDM(pc,&dm)); 1816 } 1817 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1818 Vec vcoords; 1819 PetscSection section; 1820 PetscReal *coords; 1821 PetscInt d,cdim,nl,nf,**ctxs; 1822 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1823 1824 PetscCall(DMGetCoordinateDim(dm,&cdim)); 1825 PetscCall(DMGetLocalSection(dm,§ion)); 1826 PetscCall(PetscSectionGetNumFields(section,&nf)); 1827 PetscCall(DMCreateGlobalVector(dm,&vcoords)); 1828 PetscCall(VecGetLocalSize(vcoords,&nl)); 1829 PetscCall(PetscMalloc1(nl*cdim,&coords)); 1830 PetscCall(PetscMalloc2(nf,&funcs,nf,&ctxs)); 1831 PetscCall(PetscMalloc1(nf,&ctxs[0])); 1832 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1833 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1834 for (d=0;d<cdim;d++) { 1835 PetscInt i; 1836 const PetscScalar *v; 1837 1838 for (i=0;i<nf;i++) ctxs[i][0] = d; 1839 PetscCall(DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords)); 1840 PetscCall(VecGetArrayRead(vcoords,&v)); 1841 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1842 PetscCall(VecRestoreArrayRead(vcoords,&v)); 1843 } 1844 PetscCall(VecDestroy(&vcoords)); 1845 PetscCall(PCSetCoordinates(pc,cdim,nl,coords)); 1846 PetscCall(PetscFree(coords)); 1847 PetscCall(PetscFree(ctxs[0])); 1848 PetscCall(PetscFree2(funcs,ctxs)); 1849 } 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1855 { 1856 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1857 IS nis; 1858 const PetscInt *idxs; 1859 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1860 1861 PetscFunctionBegin; 1862 PetscCheckFalse(mop != MPI_LAND && mop != MPI_LOR,PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1863 if (mop == MPI_LAND) { 1864 /* init rootdata with true */ 1865 for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1; 1866 } else { 1867 PetscCall(PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n)); 1868 } 1869 PetscCall(PetscArrayzero(matis->sf_leafdata,n)); 1870 PetscCall(ISGetLocalSize(*is,&nd)); 1871 PetscCall(ISGetIndices(*is,&idxs)); 1872 for (i=0;i<nd;i++) 1873 if (-1 < idxs[i] && idxs[i] < n) 1874 matis->sf_leafdata[idxs[i]] = 1; 1875 PetscCall(ISRestoreIndices(*is,&idxs)); 1876 PetscCall(PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1877 PetscCall(PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop)); 1878 PetscCall(PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1879 PetscCall(PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE)); 1880 if (mop == MPI_LAND) { 1881 PetscCall(PetscMalloc1(nd,&nidxs)); 1882 } else { 1883 PetscCall(PetscMalloc1(n,&nidxs)); 1884 } 1885 for (i=0,nnd=0;i<n;i++) 1886 if (matis->sf_leafdata[i]) 1887 nidxs[nnd++] = i; 1888 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis)); 1889 PetscCall(ISDestroy(is)); 1890 *is = nis; 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1895 { 1896 PC_IS *pcis = (PC_IS*)(pc->data); 1897 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1898 1899 PetscFunctionBegin; 1900 if (!pcbddc->benign_have_null) { 1901 PetscFunctionReturn(0); 1902 } 1903 if (pcbddc->ChangeOfBasisMatrix) { 1904 Vec swap; 1905 1906 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change)); 1907 swap = pcbddc->work_change; 1908 pcbddc->work_change = r; 1909 r = swap; 1910 } 1911 PetscCall(VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1912 PetscCall(VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD)); 1913 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1914 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D)); 1915 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0)); 1916 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 1917 PetscCall(VecSet(z,0.)); 1918 PetscCall(VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1919 PetscCall(VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE)); 1920 if (pcbddc->ChangeOfBasisMatrix) { 1921 pcbddc->work_change = r; 1922 PetscCall(VecCopy(z,pcbddc->work_change)); 1923 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z)); 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1929 { 1930 PCBDDCBenignMatMult_ctx ctx; 1931 PetscBool apply_right,apply_left,reset_x; 1932 1933 PetscFunctionBegin; 1934 PetscCall(MatShellGetContext(A,&ctx)); 1935 if (transpose) { 1936 apply_right = ctx->apply_left; 1937 apply_left = ctx->apply_right; 1938 } else { 1939 apply_right = ctx->apply_right; 1940 apply_left = ctx->apply_left; 1941 } 1942 reset_x = PETSC_FALSE; 1943 if (apply_right) { 1944 const PetscScalar *ax; 1945 PetscInt nl,i; 1946 1947 PetscCall(VecGetLocalSize(x,&nl)); 1948 PetscCall(VecGetArrayRead(x,&ax)); 1949 PetscCall(PetscArraycpy(ctx->work,ax,nl)); 1950 PetscCall(VecRestoreArrayRead(x,&ax)); 1951 for (i=0;i<ctx->benign_n;i++) { 1952 PetscScalar sum,val; 1953 const PetscInt *idxs; 1954 PetscInt nz,j; 1955 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1956 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1957 sum = 0.; 1958 if (ctx->apply_p0) { 1959 val = ctx->work[idxs[nz-1]]; 1960 for (j=0;j<nz-1;j++) { 1961 sum += ctx->work[idxs[j]]; 1962 ctx->work[idxs[j]] += val; 1963 } 1964 } else { 1965 for (j=0;j<nz-1;j++) { 1966 sum += ctx->work[idxs[j]]; 1967 } 1968 } 1969 ctx->work[idxs[nz-1]] -= sum; 1970 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1971 } 1972 PetscCall(VecPlaceArray(x,ctx->work)); 1973 reset_x = PETSC_TRUE; 1974 } 1975 if (transpose) { 1976 PetscCall(MatMultTranspose(ctx->A,x,y)); 1977 } else { 1978 PetscCall(MatMult(ctx->A,x,y)); 1979 } 1980 if (reset_x) { 1981 PetscCall(VecResetArray(x)); 1982 } 1983 if (apply_left) { 1984 PetscScalar *ay; 1985 PetscInt i; 1986 1987 PetscCall(VecGetArray(y,&ay)); 1988 for (i=0;i<ctx->benign_n;i++) { 1989 PetscScalar sum,val; 1990 const PetscInt *idxs; 1991 PetscInt nz,j; 1992 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz)); 1993 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs)); 1994 val = -ay[idxs[nz-1]]; 1995 if (ctx->apply_p0) { 1996 sum = 0.; 1997 for (j=0;j<nz-1;j++) { 1998 sum += ay[idxs[j]]; 1999 ay[idxs[j]] += val; 2000 } 2001 ay[idxs[nz-1]] += sum; 2002 } else { 2003 for (j=0;j<nz-1;j++) { 2004 ay[idxs[j]] += val; 2005 } 2006 ay[idxs[nz-1]] = 0.; 2007 } 2008 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs)); 2009 } 2010 PetscCall(VecRestoreArray(y,&ay)); 2011 } 2012 PetscFunctionReturn(0); 2013 } 2014 2015 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2016 { 2017 PetscFunctionBegin; 2018 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE)); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2023 { 2024 PetscFunctionBegin; 2025 PetscCall(PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE)); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2030 { 2031 PC_IS *pcis = (PC_IS*)pc->data; 2032 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2033 PCBDDCBenignMatMult_ctx ctx; 2034 2035 PetscFunctionBegin; 2036 if (!restore) { 2037 Mat A_IB,A_BI; 2038 PetscScalar *work; 2039 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2040 2041 PetscCheck(!pcbddc->benign_original_mat,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2042 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2043 PetscCall(PetscMalloc1(pcis->n,&work)); 2044 PetscCall(MatCreate(PETSC_COMM_SELF,&A_IB)); 2045 PetscCall(MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE)); 2046 PetscCall(MatSetType(A_IB,MATSHELL)); 2047 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private)); 2048 PetscCall(MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2049 PetscCall(PetscNew(&ctx)); 2050 PetscCall(MatShellSetContext(A_IB,ctx)); 2051 ctx->apply_left = PETSC_TRUE; 2052 ctx->apply_right = PETSC_FALSE; 2053 ctx->apply_p0 = PETSC_FALSE; 2054 ctx->benign_n = pcbddc->benign_n; 2055 if (reuse) { 2056 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2057 ctx->free = PETSC_FALSE; 2058 } else { /* TODO: could be optimized for successive solves */ 2059 ISLocalToGlobalMapping N_to_D; 2060 PetscInt i; 2061 2062 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D)); 2063 PetscCall(PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs)); 2064 for (i=0;i<pcbddc->benign_n;i++) { 2065 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i])); 2066 } 2067 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2068 ctx->free = PETSC_TRUE; 2069 } 2070 ctx->A = pcis->A_IB; 2071 ctx->work = work; 2072 PetscCall(MatSetUp(A_IB)); 2073 PetscCall(MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY)); 2074 PetscCall(MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY)); 2075 pcis->A_IB = A_IB; 2076 2077 /* A_BI as A_IB^T */ 2078 PetscCall(MatCreateTranspose(A_IB,&A_BI)); 2079 pcbddc->benign_original_mat = pcis->A_BI; 2080 pcis->A_BI = A_BI; 2081 } else { 2082 if (!pcbddc->benign_original_mat) { 2083 PetscFunctionReturn(0); 2084 } 2085 PetscCall(MatShellGetContext(pcis->A_IB,&ctx)); 2086 PetscCall(MatDestroy(&pcis->A_IB)); 2087 pcis->A_IB = ctx->A; 2088 ctx->A = NULL; 2089 PetscCall(MatDestroy(&pcis->A_BI)); 2090 pcis->A_BI = pcbddc->benign_original_mat; 2091 pcbddc->benign_original_mat = NULL; 2092 if (ctx->free) { 2093 PetscInt i; 2094 for (i=0;i<ctx->benign_n;i++) { 2095 PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2096 } 2097 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2098 } 2099 PetscCall(PetscFree(ctx->work)); 2100 PetscCall(PetscFree(ctx)); 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 /* used just in bddc debug mode */ 2106 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2107 { 2108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2109 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2110 Mat An; 2111 2112 PetscFunctionBegin; 2113 PetscCall(MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An)); 2114 PetscCall(MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL)); 2115 if (is1) { 2116 PetscCall(MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B)); 2117 PetscCall(MatDestroy(&An)); 2118 } else { 2119 *B = An; 2120 } 2121 PetscFunctionReturn(0); 2122 } 2123 2124 /* TODO: add reuse flag */ 2125 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2126 { 2127 Mat Bt; 2128 PetscScalar *a,*bdata; 2129 const PetscInt *ii,*ij; 2130 PetscInt m,n,i,nnz,*bii,*bij; 2131 PetscBool flg_row; 2132 2133 PetscFunctionBegin; 2134 PetscCall(MatGetSize(A,&n,&m)); 2135 PetscCall(MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2136 PetscCall(MatSeqAIJGetArray(A,&a)); 2137 nnz = n; 2138 for (i=0;i<ii[n];i++) { 2139 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2140 } 2141 PetscCall(PetscMalloc1(n+1,&bii)); 2142 PetscCall(PetscMalloc1(nnz,&bij)); 2143 PetscCall(PetscMalloc1(nnz,&bdata)); 2144 nnz = 0; 2145 bii[0] = 0; 2146 for (i=0;i<n;i++) { 2147 PetscInt j; 2148 for (j=ii[i];j<ii[i+1];j++) { 2149 PetscScalar entry = a[j]; 2150 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2151 bij[nnz] = ij[j]; 2152 bdata[nnz] = entry; 2153 nnz++; 2154 } 2155 } 2156 bii[i+1] = nnz; 2157 } 2158 PetscCall(MatSeqAIJRestoreArray(A,&a)); 2159 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt)); 2160 PetscCall(MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row)); 2161 { 2162 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2163 b->free_a = PETSC_TRUE; 2164 b->free_ij = PETSC_TRUE; 2165 } 2166 if (*B == A) { 2167 PetscCall(MatDestroy(&A)); 2168 } 2169 *B = Bt; 2170 PetscFunctionReturn(0); 2171 } 2172 2173 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2174 { 2175 Mat B = NULL; 2176 DM dm; 2177 IS is_dummy,*cc_n; 2178 ISLocalToGlobalMapping l2gmap_dummy; 2179 PCBDDCGraph graph; 2180 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2181 PetscInt i,n; 2182 PetscInt *xadj,*adjncy; 2183 PetscBool isplex = PETSC_FALSE; 2184 2185 PetscFunctionBegin; 2186 if (ncc) *ncc = 0; 2187 if (cc) *cc = NULL; 2188 if (primalv) *primalv = NULL; 2189 PetscCall(PCBDDCGraphCreate(&graph)); 2190 PetscCall(MatGetDM(pc->pmat,&dm)); 2191 if (!dm) { 2192 PetscCall(PCGetDM(pc,&dm)); 2193 } 2194 if (dm) { 2195 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex)); 2196 } 2197 if (filter) isplex = PETSC_FALSE; 2198 2199 if (isplex) { /* this code has been modified from plexpartition.c */ 2200 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2201 PetscInt *adj = NULL; 2202 IS cellNumbering; 2203 const PetscInt *cellNum; 2204 PetscBool useCone, useClosure; 2205 PetscSection section; 2206 PetscSegBuffer adjBuffer; 2207 PetscSF sfPoint; 2208 2209 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2210 PetscCall(DMGetPointSF(dm, &sfPoint)); 2211 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2212 /* Build adjacency graph via a section/segbuffer */ 2213 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion)); 2214 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2215 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer)); 2216 /* Always use FVM adjacency to create partitioner graph */ 2217 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2218 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2219 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2220 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2221 for (n = 0, p = pStart; p < pEnd; p++) { 2222 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2223 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2224 adjSize = PETSC_DETERMINE; 2225 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2226 for (a = 0; a < adjSize; ++a) { 2227 const PetscInt point = adj[a]; 2228 if (pStart <= point && point < pEnd) { 2229 PetscInt *PETSC_RESTRICT pBuf; 2230 PetscCall(PetscSectionAddDof(section, p, 1)); 2231 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2232 *pBuf = point; 2233 } 2234 } 2235 n++; 2236 } 2237 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2238 /* Derive CSR graph from section/segbuffer */ 2239 PetscCall(PetscSectionSetUp(section)); 2240 PetscCall(PetscSectionGetStorageSize(section, &size)); 2241 PetscCall(PetscMalloc1(n+1, &xadj)); 2242 for (idx = 0, p = pStart; p < pEnd; p++) { 2243 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2244 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2245 } 2246 xadj[n] = size; 2247 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2248 /* Clean up */ 2249 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2250 PetscCall(PetscSectionDestroy(§ion)); 2251 PetscCall(PetscFree(adj)); 2252 graph->xadj = xadj; 2253 graph->adjncy = adjncy; 2254 } else { 2255 Mat A; 2256 PetscBool isseqaij, flg_row; 2257 2258 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2259 if (!A->rmap->N || !A->cmap->N) { 2260 PetscCall(PCBDDCGraphDestroy(&graph)); 2261 PetscFunctionReturn(0); 2262 } 2263 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij)); 2264 if (!isseqaij && filter) { 2265 PetscBool isseqdense; 2266 2267 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense)); 2268 if (!isseqdense) { 2269 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 2270 } else { /* TODO: rectangular case and LDA */ 2271 PetscScalar *array; 2272 PetscReal chop=1.e-6; 2273 2274 PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 2275 PetscCall(MatDenseGetArray(B,&array)); 2276 PetscCall(MatGetSize(B,&n,NULL)); 2277 for (i=0;i<n;i++) { 2278 PetscInt j; 2279 for (j=i+1;j<n;j++) { 2280 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2281 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2282 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2283 } 2284 } 2285 PetscCall(MatDenseRestoreArray(B,&array)); 2286 PetscCall(MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B)); 2287 } 2288 } else { 2289 PetscCall(PetscObjectReference((PetscObject)A)); 2290 B = A; 2291 } 2292 PetscCall(MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2293 2294 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2295 if (filter) { 2296 PetscScalar *data; 2297 PetscInt j,cum; 2298 2299 PetscCall(PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered)); 2300 PetscCall(MatSeqAIJGetArray(B,&data)); 2301 cum = 0; 2302 for (i=0;i<n;i++) { 2303 PetscInt t; 2304 2305 for (j=xadj[i];j<xadj[i+1];j++) { 2306 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2307 continue; 2308 } 2309 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2310 } 2311 t = xadj_filtered[i]; 2312 xadj_filtered[i] = cum; 2313 cum += t; 2314 } 2315 PetscCall(MatSeqAIJRestoreArray(B,&data)); 2316 graph->xadj = xadj_filtered; 2317 graph->adjncy = adjncy_filtered; 2318 } else { 2319 graph->xadj = xadj; 2320 graph->adjncy = adjncy; 2321 } 2322 } 2323 /* compute local connected components using PCBDDCGraph */ 2324 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy)); 2325 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy)); 2326 PetscCall(ISDestroy(&is_dummy)); 2327 PetscCall(PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT)); 2328 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2329 PetscCall(PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL)); 2330 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2331 2332 /* partial clean up */ 2333 PetscCall(PetscFree2(xadj_filtered,adjncy_filtered)); 2334 if (B) { 2335 PetscBool flg_row; 2336 PetscCall(MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 2337 PetscCall(MatDestroy(&B)); 2338 } 2339 if (isplex) { 2340 PetscCall(PetscFree(xadj)); 2341 PetscCall(PetscFree(adjncy)); 2342 } 2343 2344 /* get back data */ 2345 if (isplex) { 2346 if (ncc) *ncc = graph->ncc; 2347 if (cc || primalv) { 2348 Mat A; 2349 PetscBT btv,btvt; 2350 PetscSection subSection; 2351 PetscInt *ids,cum,cump,*cids,*pids; 2352 2353 PetscCall(DMPlexGetSubdomainSection(dm,&subSection)); 2354 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2355 PetscCall(PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids)); 2356 PetscCall(PetscBTCreate(A->rmap->n,&btv)); 2357 PetscCall(PetscBTCreate(A->rmap->n,&btvt)); 2358 2359 cids[0] = 0; 2360 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2361 PetscInt j; 2362 2363 PetscCall(PetscBTMemzero(A->rmap->n,btvt)); 2364 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2365 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2366 2367 PetscCall(DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2368 for (k = 0; k < 2*size; k += 2) { 2369 PetscInt s, pp, p = closure[k], off, dof, cdof; 2370 2371 PetscCall(PetscSectionGetConstraintDof(subSection,p,&cdof)); 2372 PetscCall(PetscSectionGetOffset(subSection,p,&off)); 2373 PetscCall(PetscSectionGetDof(subSection,p,&dof)); 2374 for (s = 0; s < dof-cdof; s++) { 2375 if (PetscBTLookupSet(btvt,off+s)) continue; 2376 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2377 else pids[cump++] = off+s; /* cross-vertex */ 2378 } 2379 PetscCall(DMPlexGetTreeParent(dm,p,&pp,NULL)); 2380 if (pp != p) { 2381 PetscCall(PetscSectionGetConstraintDof(subSection,pp,&cdof)); 2382 PetscCall(PetscSectionGetOffset(subSection,pp,&off)); 2383 PetscCall(PetscSectionGetDof(subSection,pp,&dof)); 2384 for (s = 0; s < dof-cdof; s++) { 2385 if (PetscBTLookupSet(btvt,off+s)) continue; 2386 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s; 2387 else pids[cump++] = off+s; /* cross-vertex */ 2388 } 2389 } 2390 } 2391 PetscCall(DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure)); 2392 } 2393 cids[i+1] = cum; 2394 /* mark dofs as already assigned */ 2395 for (j = cids[i]; j < cids[i+1]; j++) { 2396 PetscCall(PetscBTSet(btv,ids[j])); 2397 } 2398 } 2399 if (cc) { 2400 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2401 for (i = 0; i < graph->ncc; i++) { 2402 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i])); 2403 } 2404 *cc = cc_n; 2405 } 2406 if (primalv) { 2407 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv)); 2408 } 2409 PetscCall(PetscFree3(ids,cids,pids)); 2410 PetscCall(PetscBTDestroy(&btv)); 2411 PetscCall(PetscBTDestroy(&btvt)); 2412 } 2413 } else { 2414 if (ncc) *ncc = graph->ncc; 2415 if (cc) { 2416 PetscCall(PetscMalloc1(graph->ncc,&cc_n)); 2417 for (i=0;i<graph->ncc;i++) { 2418 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i])); 2419 } 2420 *cc = cc_n; 2421 } 2422 } 2423 /* clean up graph */ 2424 graph->xadj = NULL; 2425 graph->adjncy = NULL; 2426 PetscCall(PCBDDCGraphDestroy(&graph)); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2431 { 2432 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2433 PC_IS* pcis = (PC_IS*)(pc->data); 2434 IS dirIS = NULL; 2435 PetscInt i; 2436 2437 PetscFunctionBegin; 2438 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS)); 2439 if (zerodiag) { 2440 Mat A; 2441 Vec vec3_N; 2442 PetscScalar *vals; 2443 const PetscInt *idxs; 2444 PetscInt nz,*count; 2445 2446 /* p0 */ 2447 PetscCall(VecSet(pcis->vec1_N,0.)); 2448 PetscCall(PetscMalloc1(pcis->n,&vals)); 2449 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2450 PetscCall(ISGetIndices(zerodiag,&idxs)); 2451 for (i=0;i<nz;i++) vals[i] = 1.; 2452 PetscCall(VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES)); 2453 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2454 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2455 /* v_I */ 2456 PetscCall(VecSetRandom(pcis->vec2_N,NULL)); 2457 for (i=0;i<nz;i++) vals[i] = 0.; 2458 PetscCall(VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES)); 2459 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2460 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2461 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2462 PetscCall(VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES)); 2463 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2464 if (dirIS) { 2465 PetscInt n; 2466 2467 PetscCall(ISGetLocalSize(dirIS,&n)); 2468 PetscCall(ISGetIndices(dirIS,&idxs)); 2469 for (i=0;i<n;i++) vals[i] = 0.; 2470 PetscCall(VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES)); 2471 PetscCall(ISRestoreIndices(dirIS,&idxs)); 2472 } 2473 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2474 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2475 PetscCall(VecDuplicate(pcis->vec1_N,&vec3_N)); 2476 PetscCall(VecSet(vec3_N,0.)); 2477 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2478 PetscCall(MatMult(A,pcis->vec1_N,vec3_N)); 2479 PetscCall(VecDot(vec3_N,pcis->vec2_N,&vals[0])); 2480 PetscCheckFalse(PetscAbsScalar(vals[0]) > 1.e-1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2481 PetscCall(PetscFree(vals)); 2482 PetscCall(VecDestroy(&vec3_N)); 2483 2484 /* there should not be any pressure dofs lying on the interface */ 2485 PetscCall(PetscCalloc1(pcis->n,&count)); 2486 PetscCall(ISGetIndices(pcis->is_B_local,&idxs)); 2487 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2488 PetscCall(ISRestoreIndices(pcis->is_B_local,&idxs)); 2489 PetscCall(ISGetIndices(zerodiag,&idxs)); 2490 for (i=0;i<nz;i++) PetscCheckFalse(count[idxs[i]],PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2491 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2492 PetscCall(PetscFree(count)); 2493 } 2494 PetscCall(ISDestroy(&dirIS)); 2495 2496 /* check PCBDDCBenignGetOrSetP0 */ 2497 PetscCall(VecSetRandom(pcis->vec1_global,NULL)); 2498 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2499 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE)); 2500 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2501 PetscCall(PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE)); 2502 for (i=0;i<pcbddc->benign_n;i++) { 2503 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2504 PetscCheckFalse(val != -PetscGlobalRank-i,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2505 } 2506 PetscFunctionReturn(0); 2507 } 2508 2509 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2510 { 2511 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2512 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2513 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2514 PetscInt nz,n,benign_n,bsp = 1; 2515 PetscInt *interior_dofs,n_interior_dofs,nneu; 2516 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2517 PetscErrorCode ierr; 2518 2519 PetscFunctionBegin; 2520 if (reuse) goto project_b0; 2521 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2522 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2523 for (n=0;n<pcbddc->benign_n;n++) { 2524 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2525 } 2526 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2527 has_null_pressures = PETSC_TRUE; 2528 have_null = PETSC_TRUE; 2529 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2530 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2531 Checks if all the pressure dofs in each subdomain have a zero diagonal 2532 If not, a change of basis on pressures is not needed 2533 since the local Schur complements are already SPD 2534 */ 2535 if (pcbddc->n_ISForDofsLocal) { 2536 IS iP = NULL; 2537 PetscInt p,*pp; 2538 PetscBool flg; 2539 2540 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp)); 2541 n = pcbddc->n_ISForDofsLocal; 2542 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");PetscCall(ierr); 2543 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg)); 2544 ierr = PetscOptionsEnd();PetscCall(ierr); 2545 if (!flg) { 2546 n = 1; 2547 pp[0] = pcbddc->n_ISForDofsLocal-1; 2548 } 2549 2550 bsp = 0; 2551 for (p=0;p<n;p++) { 2552 PetscInt bs; 2553 2554 PetscCheckFalse(pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1,PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2555 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2556 bsp += bs; 2557 } 2558 PetscCall(PetscMalloc1(bsp,&bzerodiag)); 2559 bsp = 0; 2560 for (p=0;p<n;p++) { 2561 const PetscInt *idxs; 2562 PetscInt b,bs,npl,*bidxs; 2563 2564 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs)); 2565 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl)); 2566 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2567 PetscCall(PetscMalloc1(npl/bs,&bidxs)); 2568 for (b=0;b<bs;b++) { 2569 PetscInt i; 2570 2571 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2572 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp])); 2573 bsp++; 2574 } 2575 PetscCall(PetscFree(bidxs)); 2576 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs)); 2577 } 2578 PetscCall(ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures)); 2579 2580 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2581 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP)); 2582 if (iP) { 2583 IS newpressures; 2584 2585 PetscCall(ISDifference(pressures,iP,&newpressures)); 2586 PetscCall(ISDestroy(&pressures)); 2587 pressures = newpressures; 2588 } 2589 PetscCall(ISSorted(pressures,&sorted)); 2590 if (!sorted) { 2591 PetscCall(ISSort(pressures)); 2592 } 2593 PetscCall(PetscFree(pp)); 2594 } 2595 2596 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2597 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2598 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2599 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag)); 2600 PetscCall(ISSorted(zerodiag,&sorted)); 2601 if (!sorted) { 2602 PetscCall(ISSort(zerodiag)); 2603 } 2604 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2605 zerodiag_save = zerodiag; 2606 PetscCall(ISGetLocalSize(zerodiag,&nz)); 2607 if (!nz) { 2608 if (n) have_null = PETSC_FALSE; 2609 has_null_pressures = PETSC_FALSE; 2610 PetscCall(ISDestroy(&zerodiag)); 2611 } 2612 recompute_zerodiag = PETSC_FALSE; 2613 2614 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2615 zerodiag_subs = NULL; 2616 benign_n = 0; 2617 n_interior_dofs = 0; 2618 interior_dofs = NULL; 2619 nneu = 0; 2620 if (pcbddc->NeumannBoundariesLocal) { 2621 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu)); 2622 } 2623 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2624 if (checkb) { /* need to compute interior nodes */ 2625 PetscInt n,i,j; 2626 PetscInt n_neigh,*neigh,*n_shared,**shared; 2627 PetscInt *iwork; 2628 2629 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping,&n)); 2630 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2631 PetscCall(PetscCalloc1(n,&iwork)); 2632 PetscCall(PetscMalloc1(n,&interior_dofs)); 2633 for (i=1;i<n_neigh;i++) 2634 for (j=0;j<n_shared[i];j++) 2635 iwork[shared[i][j]] += 1; 2636 for (i=0;i<n;i++) 2637 if (!iwork[i]) 2638 interior_dofs[n_interior_dofs++] = i; 2639 PetscCall(PetscFree(iwork)); 2640 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared)); 2641 } 2642 if (has_null_pressures) { 2643 IS *subs; 2644 PetscInt nsubs,i,j,nl; 2645 const PetscInt *idxs; 2646 PetscScalar *array; 2647 Vec *work; 2648 2649 subs = pcbddc->local_subs; 2650 nsubs = pcbddc->n_local_subs; 2651 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2652 if (checkb) { 2653 PetscCall(VecDuplicateVecs(matis->y,2,&work)); 2654 PetscCall(ISGetLocalSize(zerodiag,&nl)); 2655 PetscCall(ISGetIndices(zerodiag,&idxs)); 2656 /* work[0] = 1_p */ 2657 PetscCall(VecSet(work[0],0.)); 2658 PetscCall(VecGetArray(work[0],&array)); 2659 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2660 PetscCall(VecRestoreArray(work[0],&array)); 2661 /* work[0] = 1_v */ 2662 PetscCall(VecSet(work[1],1.)); 2663 PetscCall(VecGetArray(work[1],&array)); 2664 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2665 PetscCall(VecRestoreArray(work[1],&array)); 2666 PetscCall(ISRestoreIndices(zerodiag,&idxs)); 2667 } 2668 2669 if (nsubs > 1 || bsp > 1) { 2670 IS *is; 2671 PetscInt b,totb; 2672 2673 totb = bsp; 2674 is = bsp > 1 ? bzerodiag : &zerodiag; 2675 nsubs = PetscMax(nsubs,1); 2676 PetscCall(PetscCalloc1(nsubs*totb,&zerodiag_subs)); 2677 for (b=0;b<totb;b++) { 2678 for (i=0;i<nsubs;i++) { 2679 ISLocalToGlobalMapping l2g; 2680 IS t_zerodiag_subs; 2681 PetscInt nl; 2682 2683 if (subs) { 2684 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i],&l2g)); 2685 } else { 2686 IS tis; 2687 2688 PetscCall(MatGetLocalSize(pcbddc->local_mat,&nl,NULL)); 2689 PetscCall(ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis)); 2690 PetscCall(ISLocalToGlobalMappingCreateIS(tis,&l2g)); 2691 PetscCall(ISDestroy(&tis)); 2692 } 2693 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs)); 2694 PetscCall(ISGetLocalSize(t_zerodiag_subs,&nl)); 2695 if (nl) { 2696 PetscBool valid = PETSC_TRUE; 2697 2698 if (checkb) { 2699 PetscCall(VecSet(matis->x,0)); 2700 PetscCall(ISGetLocalSize(subs[i],&nl)); 2701 PetscCall(ISGetIndices(subs[i],&idxs)); 2702 PetscCall(VecGetArray(matis->x,&array)); 2703 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2704 PetscCall(VecRestoreArray(matis->x,&array)); 2705 PetscCall(ISRestoreIndices(subs[i],&idxs)); 2706 PetscCall(VecPointwiseMult(matis->x,work[0],matis->x)); 2707 PetscCall(MatMult(matis->A,matis->x,matis->y)); 2708 PetscCall(VecPointwiseMult(matis->y,work[1],matis->y)); 2709 PetscCall(VecGetArray(matis->y,&array)); 2710 for (j=0;j<n_interior_dofs;j++) { 2711 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2712 valid = PETSC_FALSE; 2713 break; 2714 } 2715 } 2716 PetscCall(VecRestoreArray(matis->y,&array)); 2717 } 2718 if (valid && nneu) { 2719 const PetscInt *idxs; 2720 PetscInt nzb; 2721 2722 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2723 PetscCall(ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL)); 2724 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 2725 if (nzb) valid = PETSC_FALSE; 2726 } 2727 if (valid && pressures) { 2728 IS t_pressure_subs,tmp; 2729 PetscInt i1,i2; 2730 2731 PetscCall(ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs)); 2732 PetscCall(ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp)); 2733 PetscCall(ISGetLocalSize(tmp,&i1)); 2734 PetscCall(ISGetLocalSize(t_zerodiag_subs,&i2)); 2735 if (i2 != i1) valid = PETSC_FALSE; 2736 PetscCall(ISDestroy(&t_pressure_subs)); 2737 PetscCall(ISDestroy(&tmp)); 2738 } 2739 if (valid) { 2740 PetscCall(ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n])); 2741 benign_n++; 2742 } else recompute_zerodiag = PETSC_TRUE; 2743 } 2744 PetscCall(ISDestroy(&t_zerodiag_subs)); 2745 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2746 } 2747 } 2748 } else { /* there's just one subdomain (or zero if they have not been detected */ 2749 PetscBool valid = PETSC_TRUE; 2750 2751 if (nneu) valid = PETSC_FALSE; 2752 if (valid && pressures) { 2753 PetscCall(ISEqual(pressures,zerodiag,&valid)); 2754 } 2755 if (valid && checkb) { 2756 PetscCall(MatMult(matis->A,work[0],matis->x)); 2757 PetscCall(VecPointwiseMult(matis->x,work[1],matis->x)); 2758 PetscCall(VecGetArray(matis->x,&array)); 2759 for (j=0;j<n_interior_dofs;j++) { 2760 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2761 valid = PETSC_FALSE; 2762 break; 2763 } 2764 } 2765 PetscCall(VecRestoreArray(matis->x,&array)); 2766 } 2767 if (valid) { 2768 benign_n = 1; 2769 PetscCall(PetscMalloc1(benign_n,&zerodiag_subs)); 2770 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2771 zerodiag_subs[0] = zerodiag; 2772 } 2773 } 2774 if (checkb) { 2775 PetscCall(VecDestroyVecs(2,&work)); 2776 } 2777 } 2778 PetscCall(PetscFree(interior_dofs)); 2779 2780 if (!benign_n) { 2781 PetscInt n; 2782 2783 PetscCall(ISDestroy(&zerodiag)); 2784 recompute_zerodiag = PETSC_FALSE; 2785 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2786 if (n) have_null = PETSC_FALSE; 2787 } 2788 2789 /* final check for null pressures */ 2790 if (zerodiag && pressures) { 2791 PetscCall(ISEqual(pressures,zerodiag,&have_null)); 2792 } 2793 2794 if (recompute_zerodiag) { 2795 PetscCall(ISDestroy(&zerodiag)); 2796 if (benign_n == 1) { 2797 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2798 zerodiag = zerodiag_subs[0]; 2799 } else { 2800 PetscInt i,nzn,*new_idxs; 2801 2802 nzn = 0; 2803 for (i=0;i<benign_n;i++) { 2804 PetscInt ns; 2805 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2806 nzn += ns; 2807 } 2808 PetscCall(PetscMalloc1(nzn,&new_idxs)); 2809 nzn = 0; 2810 for (i=0;i<benign_n;i++) { 2811 PetscInt ns,*idxs; 2812 PetscCall(ISGetLocalSize(zerodiag_subs[i],&ns)); 2813 PetscCall(ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2814 PetscCall(PetscArraycpy(new_idxs+nzn,idxs,ns)); 2815 PetscCall(ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs)); 2816 nzn += ns; 2817 } 2818 PetscCall(PetscSortInt(nzn,new_idxs)); 2819 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag)); 2820 } 2821 have_null = PETSC_FALSE; 2822 } 2823 2824 /* determines if the coarse solver will be singular or not */ 2825 PetscCallMPI(MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc))); 2826 2827 /* Prepare matrix to compute no-net-flux */ 2828 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2829 Mat A,loc_divudotp; 2830 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2831 IS row,col,isused = NULL; 2832 PetscInt M,N,n,st,n_isused; 2833 2834 if (pressures) { 2835 isused = pressures; 2836 } else { 2837 isused = zerodiag_save; 2838 } 2839 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL)); 2840 PetscCall(MatISGetLocalMat(pc->pmat,&A)); 2841 PetscCall(MatGetLocalSize(A,&n,NULL)); 2842 PetscCheckFalse(!isused && n,PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2843 n_isused = 0; 2844 if (isused) { 2845 PetscCall(ISGetLocalSize(isused,&n_isused)); 2846 } 2847 PetscCallMPI(MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 2848 st = st-n_isused; 2849 if (n) { 2850 const PetscInt *gidxs; 2851 2852 PetscCall(MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp)); 2853 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 2854 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2855 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2856 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col)); 2857 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 2858 } else { 2859 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp)); 2860 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row)); 2861 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col)); 2862 } 2863 PetscCall(MatGetSize(pc->pmat,NULL,&N)); 2864 PetscCall(ISGetSize(row,&M)); 2865 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 2866 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 2867 PetscCall(ISDestroy(&row)); 2868 PetscCall(ISDestroy(&col)); 2869 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp)); 2870 PetscCall(MatSetType(pcbddc->divudotp,MATIS)); 2871 PetscCall(MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N)); 2872 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g)); 2873 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2874 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2875 PetscCall(MatISSetLocalMat(pcbddc->divudotp,loc_divudotp)); 2876 PetscCall(MatDestroy(&loc_divudotp)); 2877 PetscCall(MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2878 PetscCall(MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY)); 2879 } 2880 PetscCall(ISDestroy(&zerodiag_save)); 2881 PetscCall(ISDestroy(&pressures)); 2882 if (bzerodiag) { 2883 PetscInt i; 2884 2885 for (i=0;i<bsp;i++) { 2886 PetscCall(ISDestroy(&bzerodiag[i])); 2887 } 2888 PetscCall(PetscFree(bzerodiag)); 2889 } 2890 pcbddc->benign_n = benign_n; 2891 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2892 2893 /* determines if the problem has subdomains with 0 pressure block */ 2894 have_null = (PetscBool)(!!pcbddc->benign_n); 2895 PetscCallMPI(MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 2896 2897 project_b0: 2898 PetscCall(MatGetLocalSize(pcbddc->local_mat,&n,NULL)); 2899 /* change of basis and p0 dofs */ 2900 if (pcbddc->benign_n) { 2901 PetscInt i,s,*nnz; 2902 2903 /* local change of basis for pressures */ 2904 PetscCall(MatDestroy(&pcbddc->benign_change)); 2905 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change)); 2906 PetscCall(MatSetType(pcbddc->benign_change,MATAIJ)); 2907 PetscCall(MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE)); 2908 PetscCall(PetscMalloc1(n,&nnz)); 2909 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2910 for (i=0;i<pcbddc->benign_n;i++) { 2911 const PetscInt *idxs; 2912 PetscInt nzs,j; 2913 2914 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs)); 2915 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2916 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2917 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2918 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs)); 2919 } 2920 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz)); 2921 PetscCall(MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 2922 PetscCall(PetscFree(nnz)); 2923 /* set identity by default */ 2924 for (i=0;i<n;i++) { 2925 PetscCall(MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES)); 2926 } 2927 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 2928 PetscCall(PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0)); 2929 /* set change on pressures */ 2930 for (s=0;s<pcbddc->benign_n;s++) { 2931 PetscScalar *array; 2932 const PetscInt *idxs; 2933 PetscInt nzs; 2934 2935 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs)); 2936 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2937 for (i=0;i<nzs-1;i++) { 2938 PetscScalar vals[2]; 2939 PetscInt cols[2]; 2940 2941 cols[0] = idxs[i]; 2942 cols[1] = idxs[nzs-1]; 2943 vals[0] = 1.; 2944 vals[1] = 1.; 2945 PetscCall(MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES)); 2946 } 2947 PetscCall(PetscMalloc1(nzs,&array)); 2948 for (i=0;i<nzs-1;i++) array[i] = -1.; 2949 array[nzs-1] = 1.; 2950 PetscCall(MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES)); 2951 /* store local idxs for p0 */ 2952 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2953 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs)); 2954 PetscCall(PetscFree(array)); 2955 } 2956 PetscCall(MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2957 PetscCall(MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY)); 2958 2959 /* project if needed */ 2960 if (pcbddc->benign_change_explicit) { 2961 Mat M; 2962 2963 PetscCall(MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M)); 2964 PetscCall(MatDestroy(&pcbddc->local_mat)); 2965 PetscCall(MatSeqAIJCompress(M,&pcbddc->local_mat)); 2966 PetscCall(MatDestroy(&M)); 2967 } 2968 /* store global idxs for p0 */ 2969 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx)); 2970 } 2971 *zerodiaglocal = zerodiag; 2972 PetscFunctionReturn(0); 2973 } 2974 2975 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2976 { 2977 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2978 PetscScalar *array; 2979 2980 PetscFunctionBegin; 2981 if (!pcbddc->benign_sf) { 2982 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf)); 2983 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx)); 2984 } 2985 if (get) { 2986 PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 2987 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 2988 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE)); 2989 PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 2990 } else { 2991 PetscCall(VecGetArray(v,&array)); 2992 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 2993 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE)); 2994 PetscCall(VecRestoreArray(v,&array)); 2995 } 2996 PetscFunctionReturn(0); 2997 } 2998 2999 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3000 { 3001 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3002 3003 PetscFunctionBegin; 3004 /* TODO: add error checking 3005 - avoid nested pop (or push) calls. 3006 - cannot push before pop. 3007 - cannot call this if pcbddc->local_mat is NULL 3008 */ 3009 if (!pcbddc->benign_n) { 3010 PetscFunctionReturn(0); 3011 } 3012 if (pop) { 3013 if (pcbddc->benign_change_explicit) { 3014 IS is_p0; 3015 MatReuse reuse; 3016 3017 /* extract B_0 */ 3018 reuse = MAT_INITIAL_MATRIX; 3019 if (pcbddc->benign_B0) { 3020 reuse = MAT_REUSE_MATRIX; 3021 } 3022 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 3023 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0)); 3024 /* remove rows and cols from local problem */ 3025 PetscCall(MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE)); 3026 PetscCall(MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 3027 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL)); 3028 PetscCall(ISDestroy(&is_p0)); 3029 } else { 3030 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3031 PetscScalar *vals; 3032 PetscInt i,n,*idxs_ins; 3033 3034 PetscCall(VecGetLocalSize(matis->y,&n)); 3035 PetscCall(PetscMalloc2(n,&idxs_ins,n,&vals)); 3036 if (!pcbddc->benign_B0) { 3037 PetscInt *nnz; 3038 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0)); 3039 PetscCall(MatSetType(pcbddc->benign_B0,MATAIJ)); 3040 PetscCall(MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE)); 3041 PetscCall(PetscMalloc1(pcbddc->benign_n,&nnz)); 3042 for (i=0;i<pcbddc->benign_n;i++) { 3043 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i])); 3044 nnz[i] = n - nnz[i]; 3045 } 3046 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz)); 3047 PetscCall(MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3048 PetscCall(PetscFree(nnz)); 3049 } 3050 3051 for (i=0;i<pcbddc->benign_n;i++) { 3052 PetscScalar *array; 3053 PetscInt *idxs,j,nz,cum; 3054 3055 PetscCall(VecSet(matis->x,0.)); 3056 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz)); 3057 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3058 for (j=0;j<nz;j++) vals[j] = 1.; 3059 PetscCall(VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES)); 3060 PetscCall(VecAssemblyBegin(matis->x)); 3061 PetscCall(VecAssemblyEnd(matis->x)); 3062 PetscCall(VecSet(matis->y,0.)); 3063 PetscCall(MatMult(matis->A,matis->x,matis->y)); 3064 PetscCall(VecGetArray(matis->y,&array)); 3065 cum = 0; 3066 for (j=0;j<n;j++) { 3067 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3068 vals[cum] = array[j]; 3069 idxs_ins[cum] = j; 3070 cum++; 3071 } 3072 } 3073 PetscCall(MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES)); 3074 PetscCall(VecRestoreArray(matis->y,&array)); 3075 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs)); 3076 } 3077 PetscCall(MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3078 PetscCall(MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY)); 3079 PetscCall(PetscFree2(idxs_ins,vals)); 3080 } 3081 } else { /* push */ 3082 if (pcbddc->benign_change_explicit) { 3083 PetscInt i; 3084 3085 for (i=0;i<pcbddc->benign_n;i++) { 3086 PetscScalar *B0_vals; 3087 PetscInt *B0_cols,B0_ncol; 3088 3089 PetscCall(MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3090 PetscCall(MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES)); 3091 PetscCall(MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES)); 3092 PetscCall(MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES)); 3093 PetscCall(MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals)); 3094 } 3095 PetscCall(MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3096 PetscCall(MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY)); 3097 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3098 } 3099 PetscFunctionReturn(0); 3100 } 3101 3102 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3103 { 3104 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3105 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3106 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3107 PetscBLASInt *B_iwork,*B_ifail; 3108 PetscScalar *work,lwork; 3109 PetscScalar *St,*S,*eigv; 3110 PetscScalar *Sarray,*Starray; 3111 PetscReal *eigs,thresh,lthresh,uthresh; 3112 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3113 PetscBool allocated_S_St; 3114 #if defined(PETSC_USE_COMPLEX) 3115 PetscReal *rwork; 3116 #endif 3117 PetscErrorCode ierr; 3118 3119 PetscFunctionBegin; 3120 PetscCheck(sub_schurs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3121 PetscCheck(sub_schurs->schur_explicit,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3122 PetscCheckFalse(sub_schurs->n_subs && (!sub_schurs->is_symmetric),PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3123 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3124 3125 if (pcbddc->dbg_flag) { 3126 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3127 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 3128 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n")); 3129 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3130 } 3131 3132 if (pcbddc->dbg_flag) { 3133 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef)); 3134 } 3135 3136 /* max size of subsets */ 3137 mss = 0; 3138 for (i=0;i<sub_schurs->n_subs;i++) { 3139 PetscInt subset_size; 3140 3141 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3142 mss = PetscMax(mss,subset_size); 3143 } 3144 3145 /* min/max and threshold */ 3146 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3147 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3148 nmax = PetscMax(nmin,nmax); 3149 allocated_S_St = PETSC_FALSE; 3150 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3151 allocated_S_St = PETSC_TRUE; 3152 } 3153 3154 /* allocate lapack workspace */ 3155 cum = cum2 = 0; 3156 maxneigs = 0; 3157 for (i=0;i<sub_schurs->n_subs;i++) { 3158 PetscInt n,subset_size; 3159 3160 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3161 n = PetscMin(subset_size,nmax); 3162 cum += subset_size; 3163 cum2 += subset_size*n; 3164 maxneigs = PetscMax(maxneigs,n); 3165 } 3166 lwork = 0; 3167 if (mss) { 3168 if (sub_schurs->is_symmetric) { 3169 PetscScalar sdummy = 0.; 3170 PetscBLASInt B_itype = 1; 3171 PetscBLASInt B_N = mss, idummy = 0; 3172 PetscReal rdummy = 0.,zero = 0.0; 3173 PetscReal eps = 0.0; /* dlamch? */ 3174 3175 B_lwork = -1; 3176 /* some implementations may complain about NULL pointers, even if we are querying */ 3177 S = &sdummy; 3178 St = &sdummy; 3179 eigs = &rdummy; 3180 eigv = &sdummy; 3181 B_iwork = &idummy; 3182 B_ifail = &idummy; 3183 #if defined(PETSC_USE_COMPLEX) 3184 rwork = &rdummy; 3185 #endif 3186 thresh = 1.0; 3187 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3188 #if defined(PETSC_USE_COMPLEX) 3189 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3190 #else 3191 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3192 #endif 3193 PetscCheckFalse(B_ierr != 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3194 PetscCall(PetscFPTrapPop()); 3195 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3196 } 3197 3198 nv = 0; 3199 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3200 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 3201 } 3202 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 3203 if (allocated_S_St) { 3204 PetscCall(PetscMalloc2(mss*mss,&S,mss*mss,&St)); 3205 } 3206 PetscCall(PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail)); 3207 #if defined(PETSC_USE_COMPLEX) 3208 PetscCall(PetscMalloc1(7*mss,&rwork)); 3209 #endif 3210 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3211 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3212 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3213 nv+cum,&pcbddc->adaptive_constraints_idxs, 3214 nv+cum2,&pcbddc->adaptive_constraints_data);PetscCall(ierr); 3215 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs)); 3216 3217 maxneigs = 0; 3218 cum = cumarray = 0; 3219 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3220 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3221 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3222 const PetscInt *idxs; 3223 3224 PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs)); 3225 for (cum=0;cum<nv;cum++) { 3226 pcbddc->adaptive_constraints_n[cum] = 1; 3227 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3228 pcbddc->adaptive_constraints_data[cum] = 1.0; 3229 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3230 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3231 } 3232 PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 3233 } 3234 3235 if (mss) { /* multilevel */ 3236 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3237 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3238 } 3239 3240 lthresh = pcbddc->adaptive_threshold[0]; 3241 uthresh = pcbddc->adaptive_threshold[1]; 3242 for (i=0;i<sub_schurs->n_subs;i++) { 3243 const PetscInt *idxs; 3244 PetscReal upper,lower; 3245 PetscInt j,subset_size,eigs_start = 0; 3246 PetscBLASInt B_N; 3247 PetscBool same_data = PETSC_FALSE; 3248 PetscBool scal = PETSC_FALSE; 3249 3250 if (pcbddc->use_deluxe_scaling) { 3251 upper = PETSC_MAX_REAL; 3252 lower = uthresh; 3253 } else { 3254 PetscCheck(sub_schurs->is_posdef,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3255 upper = 1./uthresh; 3256 lower = 0.; 3257 } 3258 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 3259 PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 3260 PetscCall(PetscBLASIntCast(subset_size,&B_N)); 3261 /* this is experimental: we assume the dofs have been properly grouped to have 3262 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3263 if (!sub_schurs->is_posdef) { 3264 Mat T; 3265 3266 for (j=0;j<subset_size;j++) { 3267 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3268 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T)); 3269 PetscCall(MatScale(T,-1.0)); 3270 PetscCall(MatDestroy(&T)); 3271 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T)); 3272 PetscCall(MatScale(T,-1.0)); 3273 PetscCall(MatDestroy(&T)); 3274 if (sub_schurs->change_primal_sub) { 3275 PetscInt nz,k; 3276 const PetscInt *idxs; 3277 3278 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz)); 3279 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i],&idxs)); 3280 for (k=0;k<nz;k++) { 3281 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3282 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3283 } 3284 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs)); 3285 } 3286 scal = PETSC_TRUE; 3287 break; 3288 } 3289 } 3290 } 3291 3292 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3293 if (sub_schurs->is_symmetric) { 3294 PetscInt j,k; 3295 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3296 PetscCall(PetscArrayzero(S,subset_size*subset_size)); 3297 PetscCall(PetscArrayzero(St,subset_size*subset_size)); 3298 } 3299 for (j=0;j<subset_size;j++) { 3300 for (k=j;k<subset_size;k++) { 3301 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3302 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3303 } 3304 } 3305 } else { 3306 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3307 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3308 } 3309 } else { 3310 S = Sarray + cumarray; 3311 St = Starray + cumarray; 3312 } 3313 /* see if we can save some work */ 3314 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3315 PetscCall(PetscArraycmp(S,St,subset_size*subset_size,&same_data)); 3316 } 3317 3318 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3319 B_neigs = 0; 3320 } else { 3321 if (sub_schurs->is_symmetric) { 3322 PetscBLASInt B_itype = 1; 3323 PetscBLASInt B_IL, B_IU; 3324 PetscReal eps = -1.0; /* dlamch? */ 3325 PetscInt nmin_s; 3326 PetscBool compute_range; 3327 3328 B_neigs = 0; 3329 compute_range = (PetscBool)!same_data; 3330 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3331 3332 if (pcbddc->dbg_flag) { 3333 PetscInt nc = 0; 3334 3335 if (sub_schurs->change_primal_sub) { 3336 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc)); 3337 } 3338 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc)); 3339 } 3340 3341 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3342 if (compute_range) { 3343 3344 /* ask for eigenvalues larger than thresh */ 3345 if (sub_schurs->is_posdef) { 3346 #if defined(PETSC_USE_COMPLEX) 3347 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3348 #else 3349 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3350 #endif 3351 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3352 } else { /* no theory so far, but it works nicely */ 3353 PetscInt recipe = 0,recipe_m = 1; 3354 PetscReal bb[2]; 3355 3356 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL)); 3357 switch (recipe) { 3358 case 0: 3359 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3360 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3361 #if defined(PETSC_USE_COMPLEX) 3362 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3363 #else 3364 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3365 #endif 3366 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3367 break; 3368 case 1: 3369 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3370 #if defined(PETSC_USE_COMPLEX) 3371 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3372 #else 3373 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3374 #endif 3375 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3376 if (!scal) { 3377 PetscBLASInt B_neigs2 = 0; 3378 3379 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3380 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3381 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3382 #if defined(PETSC_USE_COMPLEX) 3383 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3384 #else 3385 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3386 #endif 3387 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3388 B_neigs += B_neigs2; 3389 } 3390 break; 3391 case 2: 3392 if (scal) { 3393 bb[0] = PETSC_MIN_REAL; 3394 bb[1] = 0; 3395 #if defined(PETSC_USE_COMPLEX) 3396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3397 #else 3398 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3399 #endif 3400 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3401 } else { 3402 PetscBLASInt B_neigs2 = 0; 3403 PetscBool import = PETSC_FALSE; 3404 3405 lthresh = PetscMax(lthresh,0.0); 3406 if (lthresh > 0.0) { 3407 bb[0] = PETSC_MIN_REAL; 3408 bb[1] = lthresh*lthresh; 3409 3410 import = PETSC_TRUE; 3411 #if defined(PETSC_USE_COMPLEX) 3412 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3413 #else 3414 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3415 #endif 3416 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3417 } 3418 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3419 bb[1] = PETSC_MAX_REAL; 3420 if (import) { 3421 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3422 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3423 } 3424 #if defined(PETSC_USE_COMPLEX) 3425 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3426 #else 3427 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3428 #endif 3429 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3430 B_neigs += B_neigs2; 3431 } 3432 break; 3433 case 3: 3434 if (scal) { 3435 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL)); 3436 } else { 3437 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL)); 3438 } 3439 if (!scal) { 3440 bb[0] = uthresh; 3441 bb[1] = PETSC_MAX_REAL; 3442 #if defined(PETSC_USE_COMPLEX) 3443 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3444 #else 3445 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3446 #endif 3447 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3448 } 3449 if (recipe_m > 0 && B_N - B_neigs > 0) { 3450 PetscBLASInt B_neigs2 = 0; 3451 3452 B_IL = 1; 3453 PetscCall(PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU)); 3454 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3455 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3456 #if defined(PETSC_USE_COMPLEX) 3457 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3458 #else 3459 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3460 #endif 3461 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3462 B_neigs += B_neigs2; 3463 } 3464 break; 3465 case 4: 3466 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3467 #if defined(PETSC_USE_COMPLEX) 3468 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3469 #else 3470 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3471 #endif 3472 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3473 { 3474 PetscBLASInt B_neigs2 = 0; 3475 3476 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3477 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3478 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3479 #if defined(PETSC_USE_COMPLEX) 3480 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3481 #else 3482 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3483 #endif 3484 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3485 B_neigs += B_neigs2; 3486 } 3487 break; 3488 case 5: /* same as before: first compute all eigenvalues, then filter */ 3489 #if defined(PETSC_USE_COMPLEX) 3490 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3491 #else 3492 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3493 #endif 3494 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3495 { 3496 PetscInt e,k,ne; 3497 for (e=0,ne=0;e<B_neigs;e++) { 3498 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3499 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3500 eigs[ne] = eigs[e]; 3501 ne++; 3502 } 3503 } 3504 PetscCall(PetscArraycpy(eigv,S,B_N*ne)); 3505 B_neigs = ne; 3506 } 3507 break; 3508 default: 3509 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3510 } 3511 } 3512 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3513 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3514 B_IL = 1; 3515 #if defined(PETSC_USE_COMPLEX) 3516 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3517 #else 3518 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3519 #endif 3520 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3521 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3522 PetscInt k; 3523 PetscCheck(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3524 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax)); 3525 PetscCall(PetscBLASIntCast(nmax,&B_neigs)); 3526 nmin = nmax; 3527 PetscCall(PetscArrayzero(eigv,subset_size*nmax)); 3528 for (k=0;k<nmax;k++) { 3529 eigs[k] = 1./PETSC_SMALL; 3530 eigv[k*(subset_size+1)] = 1.0; 3531 } 3532 } 3533 PetscCall(PetscFPTrapPop()); 3534 if (B_ierr) { 3535 PetscCheckFalse(B_ierr < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3536 else PetscCheckFalse(B_ierr <= B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3537 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3538 } 3539 3540 if (B_neigs > nmax) { 3541 if (pcbddc->dbg_flag) { 3542 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax)); 3543 } 3544 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3545 B_neigs = nmax; 3546 } 3547 3548 nmin_s = PetscMin(nmin,B_N); 3549 if (B_neigs < nmin_s) { 3550 PetscBLASInt B_neigs2 = 0; 3551 3552 if (pcbddc->use_deluxe_scaling) { 3553 if (scal) { 3554 B_IU = nmin_s; 3555 B_IL = B_neigs + 1; 3556 } else { 3557 B_IL = B_N - nmin_s + 1; 3558 B_IU = B_N - B_neigs; 3559 } 3560 } else { 3561 B_IL = B_neigs + 1; 3562 B_IU = nmin_s; 3563 } 3564 if (pcbddc->dbg_flag) { 3565 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU)); 3566 } 3567 if (sub_schurs->is_symmetric) { 3568 PetscInt j,k; 3569 for (j=0;j<subset_size;j++) { 3570 for (k=j;k<subset_size;k++) { 3571 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3572 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3573 } 3574 } 3575 } else { 3576 PetscCall(PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size)); 3577 PetscCall(PetscArraycpy(St,Starray+cumarray,subset_size*subset_size)); 3578 } 3579 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3580 #if defined(PETSC_USE_COMPLEX) 3581 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3582 #else 3583 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3584 #endif 3585 PetscCall(PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0)); 3586 PetscCall(PetscFPTrapPop()); 3587 B_neigs += B_neigs2; 3588 } 3589 if (B_ierr) { 3590 PetscCheckFalse(B_ierr < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3591 else PetscCheckFalse(B_ierr <= B_N,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3592 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3593 } 3594 if (pcbddc->dbg_flag) { 3595 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs)); 3596 for (j=0;j<B_neigs;j++) { 3597 if (eigs[j] == 0.0) { 3598 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n")); 3599 } else { 3600 if (pcbddc->use_deluxe_scaling) { 3601 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start])); 3602 } else { 3603 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start])); 3604 } 3605 } 3606 } 3607 } 3608 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3609 } 3610 /* change the basis back to the original one */ 3611 if (sub_schurs->change) { 3612 Mat change,phi,phit; 3613 3614 if (pcbddc->dbg_flag > 2) { 3615 PetscInt ii; 3616 for (ii=0;ii<B_neigs;ii++) { 3617 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N)); 3618 for (j=0;j<B_N;j++) { 3619 #if defined(PETSC_USE_COMPLEX) 3620 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3621 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3622 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c)); 3623 #else 3624 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j])); 3625 #endif 3626 } 3627 } 3628 } 3629 PetscCall(KSPGetOperators(sub_schurs->change[i],&change,NULL)); 3630 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit)); 3631 PetscCall(MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi)); 3632 PetscCall(MatCopy(phi,phit,SAME_NONZERO_PATTERN)); 3633 PetscCall(MatDestroy(&phit)); 3634 PetscCall(MatDestroy(&phi)); 3635 } 3636 maxneigs = PetscMax(B_neigs,maxneigs); 3637 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3638 if (B_neigs) { 3639 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size)); 3640 3641 if (pcbddc->dbg_flag > 1) { 3642 PetscInt ii; 3643 for (ii=0;ii<B_neigs;ii++) { 3644 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N)); 3645 for (j=0;j<B_N;j++) { 3646 #if defined(PETSC_USE_COMPLEX) 3647 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3648 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3649 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c)); 3650 #else 3651 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]])); 3652 #endif 3653 } 3654 } 3655 } 3656 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size)); 3657 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3658 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3659 cum++; 3660 } 3661 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 3662 /* shift for next computation */ 3663 cumarray += subset_size*subset_size; 3664 } 3665 if (pcbddc->dbg_flag) { 3666 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3667 } 3668 3669 if (mss) { 3670 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray)); 3671 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray)); 3672 /* destroy matrices (junk) */ 3673 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3674 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3675 } 3676 if (allocated_S_St) { 3677 PetscCall(PetscFree2(S,St)); 3678 } 3679 PetscCall(PetscFree5(eigv,eigs,work,B_iwork,B_ifail)); 3680 #if defined(PETSC_USE_COMPLEX) 3681 PetscCall(PetscFree(rwork)); 3682 #endif 3683 if (pcbddc->dbg_flag) { 3684 PetscInt maxneigs_r; 3685 PetscCallMPI(MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc))); 3686 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r)); 3687 } 3688 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0)); 3689 PetscFunctionReturn(0); 3690 } 3691 3692 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3693 { 3694 PetscScalar *coarse_submat_vals; 3695 3696 PetscFunctionBegin; 3697 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3698 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3699 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3700 3701 /* Setup local neumann solver ksp_R */ 3702 /* PCBDDCSetUpLocalScatters should be called first! */ 3703 PetscCall(PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE)); 3704 3705 /* 3706 Setup local correction and local part of coarse basis. 3707 Gives back the dense local part of the coarse matrix in column major ordering 3708 */ 3709 PetscCall(PCBDDCSetUpCorrection(pc,&coarse_submat_vals)); 3710 3711 /* Compute total number of coarse nodes and setup coarse solver */ 3712 PetscCall(PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals)); 3713 3714 /* free */ 3715 PetscCall(PetscFree(coarse_submat_vals)); 3716 PetscFunctionReturn(0); 3717 } 3718 3719 PetscErrorCode PCBDDCResetCustomization(PC pc) 3720 { 3721 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3722 3723 PetscFunctionBegin; 3724 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3725 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3726 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3727 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3728 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3729 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3730 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3731 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3732 PetscCall(PCBDDCSetDofsSplitting(pc,0,NULL)); 3733 PetscCall(PCBDDCSetDofsSplittingLocal(pc,0,NULL)); 3734 PetscFunctionReturn(0); 3735 } 3736 3737 PetscErrorCode PCBDDCResetTopography(PC pc) 3738 { 3739 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3740 PetscInt i; 3741 3742 PetscFunctionBegin; 3743 PetscCall(MatDestroy(&pcbddc->nedcG)); 3744 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3745 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3746 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3747 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3748 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3749 PetscCall(VecDestroy(&pcbddc->work_change)); 3750 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3751 PetscCall(MatDestroy(&pcbddc->divudotp)); 3752 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3753 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3754 for (i=0;i<pcbddc->n_local_subs;i++) { 3755 PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3756 } 3757 pcbddc->n_local_subs = 0; 3758 PetscCall(PetscFree(pcbddc->local_subs)); 3759 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3760 pcbddc->graphanalyzed = PETSC_FALSE; 3761 pcbddc->recompute_topography = PETSC_TRUE; 3762 pcbddc->corner_selected = PETSC_FALSE; 3763 PetscFunctionReturn(0); 3764 } 3765 3766 PetscErrorCode PCBDDCResetSolvers(PC pc) 3767 { 3768 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3769 3770 PetscFunctionBegin; 3771 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3772 if (pcbddc->coarse_phi_B) { 3773 PetscScalar *array; 3774 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&array)); 3775 PetscCall(PetscFree(array)); 3776 } 3777 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3778 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3779 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3780 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3781 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3782 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3783 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3784 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3785 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3786 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3787 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3788 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3789 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3790 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3791 PetscCall(KSPReset(pcbddc->ksp_D)); 3792 PetscCall(KSPReset(pcbddc->ksp_R)); 3793 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3794 PetscCall(MatDestroy(&pcbddc->local_mat)); 3795 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3796 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 3797 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3798 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3799 PetscCall(MatDestroy(&pcbddc->benign_change)); 3800 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3801 PetscCall(PCBDDCBenignShellMat(pc,PETSC_TRUE)); 3802 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3803 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3804 if (pcbddc->benign_zerodiag_subs) { 3805 PetscInt i; 3806 for (i=0;i<pcbddc->benign_n;i++) { 3807 PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3808 } 3809 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3810 } 3811 PetscCall(PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0)); 3812 PetscFunctionReturn(0); 3813 } 3814 3815 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3816 { 3817 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3818 PC_IS *pcis = (PC_IS*)pc->data; 3819 VecType impVecType; 3820 PetscInt n_constraints,n_R,old_size; 3821 3822 PetscFunctionBegin; 3823 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3824 n_R = pcis->n - pcbddc->n_vertices; 3825 PetscCall(VecGetType(pcis->vec1_N,&impVecType)); 3826 /* local work vectors (try to avoid unneeded work)*/ 3827 /* R nodes */ 3828 old_size = -1; 3829 if (pcbddc->vec1_R) { 3830 PetscCall(VecGetSize(pcbddc->vec1_R,&old_size)); 3831 } 3832 if (n_R != old_size) { 3833 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3834 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3835 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R)); 3836 PetscCall(VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R)); 3837 PetscCall(VecSetType(pcbddc->vec1_R,impVecType)); 3838 PetscCall(VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R)); 3839 } 3840 /* local primal dofs */ 3841 old_size = -1; 3842 if (pcbddc->vec1_P) { 3843 PetscCall(VecGetSize(pcbddc->vec1_P,&old_size)); 3844 } 3845 if (pcbddc->local_primal_size != old_size) { 3846 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3847 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P)); 3848 PetscCall(VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size)); 3849 PetscCall(VecSetType(pcbddc->vec1_P,impVecType)); 3850 } 3851 /* local explicit constraints */ 3852 old_size = -1; 3853 if (pcbddc->vec1_C) { 3854 PetscCall(VecGetSize(pcbddc->vec1_C,&old_size)); 3855 } 3856 if (n_constraints && n_constraints != old_size) { 3857 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3858 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C)); 3859 PetscCall(VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints)); 3860 PetscCall(VecSetType(pcbddc->vec1_C,impVecType)); 3861 } 3862 PetscFunctionReturn(0); 3863 } 3864 3865 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3866 { 3867 /* pointers to pcis and pcbddc */ 3868 PC_IS* pcis = (PC_IS*)pc->data; 3869 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3870 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3871 /* submatrices of local problem */ 3872 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3873 /* submatrices of local coarse problem */ 3874 Mat S_VV,S_CV,S_VC,S_CC; 3875 /* working matrices */ 3876 Mat C_CR; 3877 /* additional working stuff */ 3878 PC pc_R; 3879 Mat F,Brhs = NULL; 3880 Vec dummy_vec; 3881 PetscBool isLU,isCHOL,need_benign_correction,sparserhs; 3882 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3883 PetscScalar *work; 3884 PetscInt *idx_V_B; 3885 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3886 PetscInt i,n_R,n_D,n_B; 3887 PetscScalar one=1.0,m_one=-1.0; 3888 3889 PetscFunctionBegin; 3890 PetscCheckFalse(!pcbddc->symmetric_primal && pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3891 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 3892 3893 /* Set Non-overlapping dimensions */ 3894 n_vertices = pcbddc->n_vertices; 3895 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3896 n_B = pcis->n_B; 3897 n_D = pcis->n - n_B; 3898 n_R = pcis->n - n_vertices; 3899 3900 /* vertices in boundary numbering */ 3901 PetscCall(PetscMalloc1(n_vertices,&idx_V_B)); 3902 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B)); 3903 PetscCheckFalse(i != n_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3904 3905 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3906 PetscCall(PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals)); 3907 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV)); 3908 PetscCall(MatDenseSetLDA(S_VV,pcbddc->local_primal_size)); 3909 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV)); 3910 PetscCall(MatDenseSetLDA(S_CV,pcbddc->local_primal_size)); 3911 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC)); 3912 PetscCall(MatDenseSetLDA(S_VC,pcbddc->local_primal_size)); 3913 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC)); 3914 PetscCall(MatDenseSetLDA(S_CC,pcbddc->local_primal_size)); 3915 3916 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3917 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_R)); 3918 PetscCall(PCSetUp(pc_R)); 3919 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU)); 3920 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL)); 3921 lda_rhs = n_R; 3922 need_benign_correction = PETSC_FALSE; 3923 if (isLU || isCHOL) { 3924 PetscCall(PCFactorGetMatrix(pc_R,&F)); 3925 } else if (sub_schurs && sub_schurs->reuse_solver) { 3926 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3927 MatFactorType type; 3928 3929 F = reuse_solver->F; 3930 PetscCall(MatGetFactorType(F,&type)); 3931 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3932 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3933 PetscCall(MatGetSize(F,&lda_rhs,NULL)); 3934 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3935 } else F = NULL; 3936 3937 /* determine if we can use a sparse right-hand side */ 3938 sparserhs = PETSC_FALSE; 3939 if (F) { 3940 MatSolverType solver; 3941 3942 PetscCall(MatFactorGetSolverType(F,&solver)); 3943 PetscCall(PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs)); 3944 } 3945 3946 /* allocate workspace */ 3947 n = 0; 3948 if (n_constraints) { 3949 n += lda_rhs*n_constraints; 3950 } 3951 if (n_vertices) { 3952 n = PetscMax(2*lda_rhs*n_vertices,n); 3953 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3954 } 3955 if (!pcbddc->symmetric_primal) { 3956 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3957 } 3958 PetscCall(PetscMalloc1(n,&work)); 3959 3960 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3961 dummy_vec = NULL; 3962 if (need_benign_correction && lda_rhs != n_R && F) { 3963 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec)); 3964 PetscCall(VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE)); 3965 PetscCall(VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name)); 3966 } 3967 3968 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3969 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3970 3971 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3972 if (n_constraints) { 3973 Mat M3,C_B; 3974 IS is_aux; 3975 3976 /* Extract constraints on R nodes: C_{CR} */ 3977 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux)); 3978 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR)); 3979 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 3980 3981 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3982 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3983 if (!sparserhs) { 3984 PetscCall(PetscArrayzero(work,lda_rhs*n_constraints)); 3985 for (i=0;i<n_constraints;i++) { 3986 const PetscScalar *row_cmat_values; 3987 const PetscInt *row_cmat_indices; 3988 PetscInt size_of_constraint,j; 3989 3990 PetscCall(MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 3991 for (j=0;j<size_of_constraint;j++) { 3992 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3993 } 3994 PetscCall(MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values)); 3995 } 3996 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs)); 3997 } else { 3998 Mat tC_CR; 3999 4000 PetscCall(MatScale(C_CR,-1.0)); 4001 if (lda_rhs != n_R) { 4002 PetscScalar *aa; 4003 PetscInt r,*ii,*jj; 4004 PetscBool done; 4005 4006 PetscCall(MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4007 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4008 PetscCall(MatSeqAIJGetArray(C_CR,&aa)); 4009 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR)); 4010 PetscCall(MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4011 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4012 } else { 4013 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4014 tC_CR = C_CR; 4015 } 4016 PetscCall(MatCreateTranspose(tC_CR,&Brhs)); 4017 PetscCall(MatDestroy(&tC_CR)); 4018 } 4019 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R)); 4020 if (F) { 4021 if (need_benign_correction) { 4022 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4023 4024 /* rhs is already zero on interior dofs, no need to change the rhs */ 4025 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n)); 4026 } 4027 PetscCall(MatMatSolve(F,Brhs,local_auxmat2_R)); 4028 if (need_benign_correction) { 4029 PetscScalar *marr; 4030 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4031 4032 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4033 if (lda_rhs != n_R) { 4034 for (i=0;i<n_constraints;i++) { 4035 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4036 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4037 PetscCall(VecResetArray(dummy_vec)); 4038 } 4039 } else { 4040 for (i=0;i<n_constraints;i++) { 4041 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4042 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4043 PetscCall(VecResetArray(pcbddc->vec1_R)); 4044 } 4045 } 4046 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4047 } 4048 } else { 4049 PetscScalar *marr; 4050 4051 PetscCall(MatDenseGetArray(local_auxmat2_R,&marr)); 4052 for (i=0;i<n_constraints;i++) { 4053 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4054 PetscCall(VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs)); 4055 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4056 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4057 PetscCall(VecResetArray(pcbddc->vec1_R)); 4058 PetscCall(VecResetArray(pcbddc->vec2_R)); 4059 } 4060 PetscCall(MatDenseRestoreArray(local_auxmat2_R,&marr)); 4061 } 4062 if (sparserhs) { 4063 PetscCall(MatScale(C_CR,-1.0)); 4064 } 4065 PetscCall(MatDestroy(&Brhs)); 4066 if (!pcbddc->switch_static) { 4067 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2)); 4068 for (i=0;i<n_constraints;i++) { 4069 Vec r, b; 4070 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R,i,&r)); 4071 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b)); 4072 PetscCall(VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4073 PetscCall(VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD)); 4074 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b)); 4075 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r)); 4076 } 4077 PetscCall(MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4078 } else { 4079 if (lda_rhs != n_R) { 4080 IS dummy; 4081 4082 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy)); 4083 PetscCall(MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2)); 4084 PetscCall(ISDestroy(&dummy)); 4085 } else { 4086 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4087 pcbddc->local_auxmat2 = local_auxmat2_R; 4088 } 4089 PetscCall(MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3)); 4090 } 4091 PetscCall(ISDestroy(&is_aux)); 4092 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4093 PetscCall(MatScale(M3,m_one)); 4094 if (isCHOL) { 4095 PetscCall(MatCholeskyFactor(M3,NULL,NULL)); 4096 } else { 4097 PetscCall(MatLUFactor(M3,NULL,NULL,NULL)); 4098 } 4099 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 4100 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4101 PetscCall(MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1)); 4102 PetscCall(MatDestroy(&C_B)); 4103 PetscCall(MatCopy(M3,S_CC,SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4104 PetscCall(MatDestroy(&M3)); 4105 } 4106 4107 /* Get submatrices from subdomain matrix */ 4108 if (n_vertices) { 4109 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4110 PetscBool oldpin; 4111 #endif 4112 PetscBool isaij; 4113 IS is_aux; 4114 4115 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4116 IS tis; 4117 4118 PetscCall(ISDuplicate(pcbddc->is_R_local,&tis)); 4119 PetscCall(ISSort(tis)); 4120 PetscCall(ISComplement(tis,0,pcis->n,&is_aux)); 4121 PetscCall(ISDestroy(&tis)); 4122 } else { 4123 PetscCall(ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux)); 4124 } 4125 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4126 oldpin = pcbddc->local_mat->boundtocpu; 4127 #endif 4128 PetscCall(MatBindToCPU(pcbddc->local_mat,PETSC_TRUE)); 4129 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV)); 4130 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR)); 4131 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij)); 4132 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4133 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4134 } 4135 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV)); 4136 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4137 PetscCall(MatBindToCPU(pcbddc->local_mat,oldpin)); 4138 #endif 4139 PetscCall(ISDestroy(&is_aux)); 4140 } 4141 4142 /* Matrix of coarse basis functions (local) */ 4143 if (pcbddc->coarse_phi_B) { 4144 PetscInt on_B,on_primal,on_D=n_D; 4145 if (pcbddc->coarse_phi_D) { 4146 PetscCall(MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL)); 4147 } 4148 PetscCall(MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal)); 4149 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4150 PetscScalar *marray; 4151 4152 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&marray)); 4153 PetscCall(PetscFree(marray)); 4154 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4155 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4156 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4157 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4158 } 4159 } 4160 4161 if (!pcbddc->coarse_phi_B) { 4162 PetscScalar *marr; 4163 4164 /* memory size */ 4165 n = n_B*pcbddc->local_primal_size; 4166 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4167 if (!pcbddc->symmetric_primal) n *= 2; 4168 PetscCall(PetscCalloc1(n,&marr)); 4169 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B)); 4170 marr += n_B*pcbddc->local_primal_size; 4171 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4172 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D)); 4173 marr += n_D*pcbddc->local_primal_size; 4174 } 4175 if (!pcbddc->symmetric_primal) { 4176 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B)); 4177 marr += n_B*pcbddc->local_primal_size; 4178 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4179 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D)); 4180 } 4181 } else { 4182 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4183 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4184 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4185 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4186 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4187 } 4188 } 4189 } 4190 4191 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4192 p0_lidx_I = NULL; 4193 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4194 const PetscInt *idxs; 4195 4196 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 4197 PetscCall(PetscMalloc1(pcbddc->benign_n,&p0_lidx_I)); 4198 for (i=0;i<pcbddc->benign_n;i++) { 4199 PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i])); 4200 } 4201 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 4202 } 4203 4204 /* vertices */ 4205 if (n_vertices) { 4206 PetscBool restoreavr = PETSC_FALSE; 4207 4208 PetscCall(MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV)); 4209 4210 if (n_R) { 4211 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4212 PetscBLASInt B_N,B_one = 1; 4213 const PetscScalar *x; 4214 PetscScalar *y; 4215 4216 PetscCall(MatScale(A_RV,m_one)); 4217 if (need_benign_correction) { 4218 ISLocalToGlobalMapping RtoN; 4219 IS is_p0; 4220 PetscInt *idxs_p0,n; 4221 4222 PetscCall(PetscMalloc1(pcbddc->benign_n,&idxs_p0)); 4223 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN)); 4224 PetscCall(ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0)); 4225 PetscCheckFalse(n != pcbddc->benign_n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4226 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4227 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0)); 4228 PetscCall(MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr)); 4229 PetscCall(ISDestroy(&is_p0)); 4230 } 4231 4232 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV)); 4233 if (!sparserhs || need_benign_correction) { 4234 if (lda_rhs == n_R) { 4235 PetscCall(MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV)); 4236 } else { 4237 PetscScalar *av,*array; 4238 const PetscInt *xadj,*adjncy; 4239 PetscInt n; 4240 PetscBool flg_row; 4241 4242 array = work+lda_rhs*n_vertices; 4243 PetscCall(PetscArrayzero(array,lda_rhs*n_vertices)); 4244 PetscCall(MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV)); 4245 PetscCall(MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4246 PetscCall(MatSeqAIJGetArray(A_RV,&av)); 4247 for (i=0;i<n;i++) { 4248 PetscInt j; 4249 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4250 } 4251 PetscCall(MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4252 PetscCall(MatDestroy(&A_RV)); 4253 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV)); 4254 } 4255 if (need_benign_correction) { 4256 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4257 PetscScalar *marr; 4258 4259 PetscCall(MatDenseGetArray(A_RV,&marr)); 4260 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4261 4262 | 0 0 0 | (V) 4263 L = | 0 0 -1 | (P-p0) 4264 | 0 0 -1 | (p0) 4265 4266 */ 4267 for (i=0;i<reuse_solver->benign_n;i++) { 4268 const PetscScalar *vals; 4269 const PetscInt *idxs,*idxs_zero; 4270 PetscInt n,j,nz; 4271 4272 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4273 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4274 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4275 for (j=0;j<n;j++) { 4276 PetscScalar val = vals[j]; 4277 PetscInt k,col = idxs[j]; 4278 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4279 } 4280 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4281 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4282 } 4283 PetscCall(MatDenseRestoreArray(A_RV,&marr)); 4284 } 4285 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4286 Brhs = A_RV; 4287 } else { 4288 Mat tA_RVT,A_RVT; 4289 4290 if (!pcbddc->symmetric_primal) { 4291 /* A_RV already scaled by -1 */ 4292 PetscCall(MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT)); 4293 } else { 4294 restoreavr = PETSC_TRUE; 4295 PetscCall(MatScale(A_VR,-1.0)); 4296 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4297 A_RVT = A_VR; 4298 } 4299 if (lda_rhs != n_R) { 4300 PetscScalar *aa; 4301 PetscInt r,*ii,*jj; 4302 PetscBool done; 4303 4304 PetscCall(MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4305 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4306 PetscCall(MatSeqAIJGetArray(A_RVT,&aa)); 4307 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT)); 4308 PetscCall(MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done)); 4309 PetscCheck(done,PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4310 } else { 4311 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4312 tA_RVT = A_RVT; 4313 } 4314 PetscCall(MatCreateTranspose(tA_RVT,&Brhs)); 4315 PetscCall(MatDestroy(&tA_RVT)); 4316 PetscCall(MatDestroy(&A_RVT)); 4317 } 4318 if (F) { 4319 /* need to correct the rhs */ 4320 if (need_benign_correction) { 4321 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4322 PetscScalar *marr; 4323 4324 PetscCall(MatDenseGetArray(Brhs,&marr)); 4325 if (lda_rhs != n_R) { 4326 for (i=0;i<n_vertices;i++) { 4327 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4328 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE)); 4329 PetscCall(VecResetArray(dummy_vec)); 4330 } 4331 } else { 4332 for (i=0;i<n_vertices;i++) { 4333 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4334 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE)); 4335 PetscCall(VecResetArray(pcbddc->vec1_R)); 4336 } 4337 } 4338 PetscCall(MatDenseRestoreArray(Brhs,&marr)); 4339 } 4340 PetscCall(MatMatSolve(F,Brhs,A_RRmA_RV)); 4341 if (restoreavr) { 4342 PetscCall(MatScale(A_VR,-1.0)); 4343 } 4344 /* need to correct the solution */ 4345 if (need_benign_correction) { 4346 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4347 PetscScalar *marr; 4348 4349 PetscCall(MatDenseGetArray(A_RRmA_RV,&marr)); 4350 if (lda_rhs != n_R) { 4351 for (i=0;i<n_vertices;i++) { 4352 PetscCall(VecPlaceArray(dummy_vec,marr+i*lda_rhs)); 4353 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE)); 4354 PetscCall(VecResetArray(dummy_vec)); 4355 } 4356 } else { 4357 for (i=0;i<n_vertices;i++) { 4358 PetscCall(VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs)); 4359 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE)); 4360 PetscCall(VecResetArray(pcbddc->vec1_R)); 4361 } 4362 } 4363 PetscCall(MatDenseRestoreArray(A_RRmA_RV,&marr)); 4364 } 4365 } else { 4366 PetscCall(MatDenseGetArray(Brhs,&y)); 4367 for (i=0;i<n_vertices;i++) { 4368 PetscCall(VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs)); 4369 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs)); 4370 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4371 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4372 PetscCall(VecResetArray(pcbddc->vec1_R)); 4373 PetscCall(VecResetArray(pcbddc->vec2_R)); 4374 } 4375 PetscCall(MatDenseRestoreArray(Brhs,&y)); 4376 } 4377 PetscCall(MatDestroy(&A_RV)); 4378 PetscCall(MatDestroy(&Brhs)); 4379 /* S_VV and S_CV */ 4380 if (n_constraints) { 4381 Mat B; 4382 4383 PetscCall(PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices)); 4384 for (i=0;i<n_vertices;i++) { 4385 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs)); 4386 PetscCall(VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B)); 4387 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4388 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 4389 PetscCall(VecResetArray(pcis->vec1_B)); 4390 PetscCall(VecResetArray(pcbddc->vec1_R)); 4391 } 4392 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B)); 4393 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4394 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV)); 4395 PetscCall(MatProductSetType(S_CV,MATPRODUCT_AB)); 4396 PetscCall(MatProductSetFromOptions(S_CV)); 4397 PetscCall(MatProductSymbolic(S_CV)); 4398 PetscCall(MatProductNumeric(S_CV)); 4399 PetscCall(MatProductClear(S_CV)); 4400 4401 PetscCall(MatDestroy(&B)); 4402 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B)); 4403 /* Reuse B = local_auxmat2_R * S_CV */ 4404 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B)); 4405 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4406 PetscCall(MatProductSetFromOptions(B)); 4407 PetscCall(MatProductSymbolic(B)); 4408 PetscCall(MatProductNumeric(B)); 4409 4410 PetscCall(MatScale(S_CV,m_one)); 4411 PetscCall(PetscBLASIntCast(lda_rhs*n_vertices,&B_N)); 4412 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4413 PetscCall(MatDestroy(&B)); 4414 } 4415 if (lda_rhs != n_R) { 4416 PetscCall(MatDestroy(&A_RRmA_RV)); 4417 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV)); 4418 PetscCall(MatDenseSetLDA(A_RRmA_RV,lda_rhs)); 4419 } 4420 PetscCall(MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt)); 4421 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4422 if (need_benign_correction) { 4423 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4424 PetscScalar *marr,*sums; 4425 4426 PetscCall(PetscMalloc1(n_vertices,&sums)); 4427 PetscCall(MatDenseGetArray(S_VVt,&marr)); 4428 for (i=0;i<reuse_solver->benign_n;i++) { 4429 const PetscScalar *vals; 4430 const PetscInt *idxs,*idxs_zero; 4431 PetscInt n,j,nz; 4432 4433 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz)); 4434 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4435 for (j=0;j<n_vertices;j++) { 4436 PetscInt k; 4437 sums[j] = 0.; 4438 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4439 } 4440 PetscCall(MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4441 for (j=0;j<n;j++) { 4442 PetscScalar val = vals[j]; 4443 PetscInt k; 4444 for (k=0;k<n_vertices;k++) { 4445 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4446 } 4447 } 4448 PetscCall(MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals)); 4449 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero)); 4450 } 4451 PetscCall(PetscFree(sums)); 4452 PetscCall(MatDenseRestoreArray(S_VVt,&marr)); 4453 PetscCall(MatDestroy(&A_RV_bcorr)); 4454 } 4455 PetscCall(MatDestroy(&A_RRmA_RV)); 4456 PetscCall(PetscBLASIntCast(n_vertices*n_vertices,&B_N)); 4457 PetscCall(MatDenseGetArrayRead(A_VV,&x)); 4458 PetscCall(MatDenseGetArray(S_VVt,&y)); 4459 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4460 PetscCall(MatDenseRestoreArrayRead(A_VV,&x)); 4461 PetscCall(MatDenseRestoreArray(S_VVt,&y)); 4462 PetscCall(MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN)); 4463 PetscCall(MatDestroy(&S_VVt)); 4464 } else { 4465 PetscCall(MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN)); 4466 } 4467 PetscCall(MatDestroy(&A_VV)); 4468 4469 /* coarse basis functions */ 4470 for (i=0;i<n_vertices;i++) { 4471 Vec v; 4472 PetscScalar one = 1.0,zero = 0.0; 4473 4474 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4475 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v)); 4476 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4477 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4478 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4479 PetscMPIInt rank; 4480 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank)); 4481 PetscCheckFalse(rank > 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4482 } 4483 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4484 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4485 PetscCall(VecAssemblyEnd(v)); 4486 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v)); 4487 4488 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4489 PetscInt j; 4490 4491 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v)); 4492 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4493 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4494 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4495 PetscMPIInt rank; 4496 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank)); 4497 PetscCheckFalse(rank > 1,PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),PETSC_ERR_PLIB,"Expected a sequential dense matrix"); 4498 } 4499 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4500 PetscCall(VecAssemblyBegin(v)); 4501 PetscCall(VecAssemblyEnd(v)); 4502 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v)); 4503 } 4504 PetscCall(VecResetArray(pcbddc->vec1_R)); 4505 } 4506 /* if n_R == 0 the object is not destroyed */ 4507 PetscCall(MatDestroy(&A_RV)); 4508 } 4509 PetscCall(VecDestroy(&dummy_vec)); 4510 4511 if (n_constraints) { 4512 Mat B; 4513 4514 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B)); 4515 PetscCall(MatScale(S_CC,m_one)); 4516 PetscCall(MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B)); 4517 PetscCall(MatProductSetType(B,MATPRODUCT_AB)); 4518 PetscCall(MatProductSetFromOptions(B)); 4519 PetscCall(MatProductSymbolic(B)); 4520 PetscCall(MatProductNumeric(B)); 4521 4522 PetscCall(MatScale(S_CC,m_one)); 4523 if (n_vertices) { 4524 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4525 PetscCall(MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC)); 4526 } else { 4527 Mat S_VCt; 4528 4529 if (lda_rhs != n_R) { 4530 PetscCall(MatDestroy(&B)); 4531 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B)); 4532 PetscCall(MatDenseSetLDA(B,lda_rhs)); 4533 } 4534 PetscCall(MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt)); 4535 PetscCall(MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN)); 4536 PetscCall(MatDestroy(&S_VCt)); 4537 } 4538 } 4539 PetscCall(MatDestroy(&B)); 4540 /* coarse basis functions */ 4541 for (i=0;i<n_constraints;i++) { 4542 Vec v; 4543 4544 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i)); 4545 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4546 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4547 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4548 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v)); 4549 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4550 PetscInt j; 4551 PetscScalar zero = 0.0; 4552 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4553 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4554 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4555 for (j=0;j<pcbddc->benign_n;j++) PetscCall(VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES)); 4556 PetscCall(VecAssemblyBegin(v)); 4557 PetscCall(VecAssemblyEnd(v)); 4558 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v)); 4559 } 4560 PetscCall(VecResetArray(pcbddc->vec1_R)); 4561 } 4562 } 4563 if (n_constraints) { 4564 PetscCall(MatDestroy(&local_auxmat2_R)); 4565 } 4566 PetscCall(PetscFree(p0_lidx_I)); 4567 4568 /* coarse matrix entries relative to B_0 */ 4569 if (pcbddc->benign_n) { 4570 Mat B0_B,B0_BPHI; 4571 IS is_dummy; 4572 const PetscScalar *data; 4573 PetscInt j; 4574 4575 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4576 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4577 PetscCall(ISDestroy(&is_dummy)); 4578 PetscCall(MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4579 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4580 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data)); 4581 for (j=0;j<pcbddc->benign_n;j++) { 4582 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4583 for (i=0;i<pcbddc->local_primal_size;i++) { 4584 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4585 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4586 } 4587 } 4588 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data)); 4589 PetscCall(MatDestroy(&B0_B)); 4590 PetscCall(MatDestroy(&B0_BPHI)); 4591 } 4592 4593 /* compute other basis functions for non-symmetric problems */ 4594 if (!pcbddc->symmetric_primal) { 4595 Mat B_V=NULL,B_C=NULL; 4596 PetscScalar *marray; 4597 4598 if (n_constraints) { 4599 Mat S_CCT,C_CRT; 4600 4601 PetscCall(MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT)); 4602 PetscCall(MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT)); 4603 PetscCall(MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C)); 4604 PetscCall(MatDestroy(&S_CCT)); 4605 if (n_vertices) { 4606 Mat S_VCT; 4607 4608 PetscCall(MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT)); 4609 PetscCall(MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V)); 4610 PetscCall(MatDestroy(&S_VCT)); 4611 } 4612 PetscCall(MatDestroy(&C_CRT)); 4613 } else { 4614 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V)); 4615 } 4616 if (n_vertices && n_R) { 4617 PetscScalar *av,*marray; 4618 const PetscInt *xadj,*adjncy; 4619 PetscInt n; 4620 PetscBool flg_row; 4621 4622 /* B_V = B_V - A_VR^T */ 4623 PetscCall(MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR)); 4624 PetscCall(MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4625 PetscCall(MatSeqAIJGetArray(A_VR,&av)); 4626 PetscCall(MatDenseGetArray(B_V,&marray)); 4627 for (i=0;i<n;i++) { 4628 PetscInt j; 4629 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4630 } 4631 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4632 PetscCall(MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row)); 4633 PetscCall(MatDestroy(&A_VR)); 4634 } 4635 4636 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4637 if (n_vertices) { 4638 PetscCall(MatDenseGetArray(B_V,&marray)); 4639 for (i=0;i<n_vertices;i++) { 4640 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+i*n_R)); 4641 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4642 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4643 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4644 PetscCall(VecResetArray(pcbddc->vec1_R)); 4645 PetscCall(VecResetArray(pcbddc->vec2_R)); 4646 } 4647 PetscCall(MatDenseRestoreArray(B_V,&marray)); 4648 } 4649 if (B_C) { 4650 PetscCall(MatDenseGetArray(B_C,&marray)); 4651 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4652 PetscCall(VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R)); 4653 PetscCall(VecPlaceArray(pcbddc->vec2_R,work+i*n_R)); 4654 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R)); 4655 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 4656 PetscCall(VecResetArray(pcbddc->vec1_R)); 4657 PetscCall(VecResetArray(pcbddc->vec2_R)); 4658 } 4659 PetscCall(MatDenseRestoreArray(B_C,&marray)); 4660 } 4661 /* coarse basis functions */ 4662 for (i=0;i<pcbddc->local_primal_size;i++) { 4663 Vec v; 4664 4665 PetscCall(VecPlaceArray(pcbddc->vec1_R,work+i*n_R)); 4666 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v)); 4667 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4668 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4669 if (i<n_vertices) { 4670 PetscScalar one = 1.0; 4671 PetscCall(VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES)); 4672 PetscCall(VecAssemblyBegin(v)); 4673 PetscCall(VecAssemblyEnd(v)); 4674 } 4675 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v)); 4676 4677 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4678 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v)); 4679 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4680 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD)); 4681 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v)); 4682 } 4683 PetscCall(VecResetArray(pcbddc->vec1_R)); 4684 } 4685 PetscCall(MatDestroy(&B_V)); 4686 PetscCall(MatDestroy(&B_C)); 4687 } 4688 4689 /* free memory */ 4690 PetscCall(PetscFree(idx_V_B)); 4691 PetscCall(MatDestroy(&S_VV)); 4692 PetscCall(MatDestroy(&S_CV)); 4693 PetscCall(MatDestroy(&S_VC)); 4694 PetscCall(MatDestroy(&S_CC)); 4695 PetscCall(PetscFree(work)); 4696 if (n_vertices) { 4697 PetscCall(MatDestroy(&A_VR)); 4698 } 4699 if (n_constraints) { 4700 PetscCall(MatDestroy(&C_CR)); 4701 } 4702 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0)); 4703 4704 /* Checking coarse_sub_mat and coarse basis functios */ 4705 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4706 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4707 if (pcbddc->dbg_flag) { 4708 Mat coarse_sub_mat; 4709 Mat AUXMAT,TM1,TM2,TM3,TM4; 4710 Mat coarse_phi_D,coarse_phi_B; 4711 Mat coarse_psi_D,coarse_psi_B; 4712 Mat A_II,A_BB,A_IB,A_BI; 4713 Mat C_B,CPHI; 4714 IS is_dummy; 4715 Vec mones; 4716 MatType checkmattype=MATSEQAIJ; 4717 PetscReal real_value; 4718 4719 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4720 Mat A; 4721 PetscCall(PCBDDCBenignProject(pc,NULL,NULL,&A)); 4722 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II)); 4723 PetscCall(MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB)); 4724 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI)); 4725 PetscCall(MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB)); 4726 PetscCall(MatDestroy(&A)); 4727 } else { 4728 PetscCall(MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II)); 4729 PetscCall(MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB)); 4730 PetscCall(MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI)); 4731 PetscCall(MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB)); 4732 } 4733 PetscCall(MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D)); 4734 PetscCall(MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B)); 4735 if (!pcbddc->symmetric_primal) { 4736 PetscCall(MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D)); 4737 PetscCall(MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B)); 4738 } 4739 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat)); 4740 4741 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 4742 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal)); 4743 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4744 if (!pcbddc->symmetric_primal) { 4745 PetscCall(MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4746 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1)); 4747 PetscCall(MatDestroy(&AUXMAT)); 4748 PetscCall(MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4749 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2)); 4750 PetscCall(MatDestroy(&AUXMAT)); 4751 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4752 PetscCall(MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4753 PetscCall(MatDestroy(&AUXMAT)); 4754 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4755 PetscCall(MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4756 PetscCall(MatDestroy(&AUXMAT)); 4757 } else { 4758 PetscCall(MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1)); 4759 PetscCall(MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2)); 4760 PetscCall(MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4761 PetscCall(MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3)); 4762 PetscCall(MatDestroy(&AUXMAT)); 4763 PetscCall(MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT)); 4764 PetscCall(MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4)); 4765 PetscCall(MatDestroy(&AUXMAT)); 4766 } 4767 PetscCall(MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN)); 4768 PetscCall(MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN)); 4769 PetscCall(MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN)); 4770 PetscCall(MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1)); 4771 if (pcbddc->benign_n) { 4772 Mat B0_B,B0_BPHI; 4773 const PetscScalar *data2; 4774 PetscScalar *data; 4775 PetscInt j; 4776 4777 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy)); 4778 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 4779 PetscCall(MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI)); 4780 PetscCall(MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI)); 4781 PetscCall(MatDenseGetArray(TM1,&data)); 4782 PetscCall(MatDenseGetArrayRead(B0_BPHI,&data2)); 4783 for (j=0;j<pcbddc->benign_n;j++) { 4784 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4785 for (i=0;i<pcbddc->local_primal_size;i++) { 4786 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4787 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4788 } 4789 } 4790 PetscCall(MatDenseRestoreArray(TM1,&data)); 4791 PetscCall(MatDenseRestoreArrayRead(B0_BPHI,&data2)); 4792 PetscCall(MatDestroy(&B0_B)); 4793 PetscCall(ISDestroy(&is_dummy)); 4794 PetscCall(MatDestroy(&B0_BPHI)); 4795 } 4796 #if 0 4797 { 4798 PetscViewer viewer; 4799 char filename[256]; 4800 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4801 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4802 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4803 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4804 PetscCall(MatView(coarse_sub_mat,viewer)); 4805 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4806 PetscCall(MatView(TM1,viewer)); 4807 if (pcbddc->coarse_phi_B) { 4808 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4809 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4810 } 4811 if (pcbddc->coarse_phi_D) { 4812 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4813 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4814 } 4815 if (pcbddc->coarse_psi_B) { 4816 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4817 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4818 } 4819 if (pcbddc->coarse_psi_D) { 4820 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4821 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4822 } 4823 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4824 PetscCall(MatView(pcbddc->local_mat,viewer)); 4825 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4826 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4827 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4828 PetscCall(ISView(pcis->is_I_local,viewer)); 4829 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4830 PetscCall(ISView(pcis->is_B_local,viewer)); 4831 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4832 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4833 PetscCall(PetscViewerDestroy(&viewer)); 4834 } 4835 #endif 4836 PetscCall(MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN)); 4837 PetscCall(MatNorm(TM1,NORM_FROBENIUS,&real_value)); 4838 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4839 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value)); 4840 4841 /* check constraints */ 4842 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy)); 4843 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B)); 4844 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4845 PetscCall(MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4846 } else { 4847 PetscScalar *data; 4848 Mat tmat; 4849 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B,&data)); 4850 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat)); 4851 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B,&data)); 4852 PetscCall(MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI)); 4853 PetscCall(MatDestroy(&tmat)); 4854 } 4855 PetscCall(MatCreateVecs(CPHI,&mones,NULL)); 4856 PetscCall(VecSet(mones,-1.0)); 4857 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4858 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4859 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value)); 4860 if (!pcbddc->symmetric_primal) { 4861 PetscCall(MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI)); 4862 PetscCall(VecSet(mones,-1.0)); 4863 PetscCall(MatDiagonalSet(CPHI,mones,ADD_VALUES)); 4864 PetscCall(MatNorm(CPHI,NORM_FROBENIUS,&real_value)); 4865 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value)); 4866 } 4867 PetscCall(MatDestroy(&C_B)); 4868 PetscCall(MatDestroy(&CPHI)); 4869 PetscCall(ISDestroy(&is_dummy)); 4870 PetscCall(VecDestroy(&mones)); 4871 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4872 PetscCall(MatDestroy(&A_II)); 4873 PetscCall(MatDestroy(&A_BB)); 4874 PetscCall(MatDestroy(&A_IB)); 4875 PetscCall(MatDestroy(&A_BI)); 4876 PetscCall(MatDestroy(&TM1)); 4877 PetscCall(MatDestroy(&TM2)); 4878 PetscCall(MatDestroy(&TM3)); 4879 PetscCall(MatDestroy(&TM4)); 4880 PetscCall(MatDestroy(&coarse_phi_D)); 4881 PetscCall(MatDestroy(&coarse_phi_B)); 4882 if (!pcbddc->symmetric_primal) { 4883 PetscCall(MatDestroy(&coarse_psi_D)); 4884 PetscCall(MatDestroy(&coarse_psi_B)); 4885 } 4886 PetscCall(MatDestroy(&coarse_sub_mat)); 4887 } 4888 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4889 { 4890 PetscBool gpu; 4891 4892 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu)); 4893 if (gpu) { 4894 if (pcbddc->local_auxmat1) { 4895 PetscCall(MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1)); 4896 } 4897 if (pcbddc->local_auxmat2) { 4898 PetscCall(MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2)); 4899 } 4900 if (pcbddc->coarse_phi_B) { 4901 PetscCall(MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B)); 4902 } 4903 if (pcbddc->coarse_phi_D) { 4904 PetscCall(MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D)); 4905 } 4906 if (pcbddc->coarse_psi_B) { 4907 PetscCall(MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B)); 4908 } 4909 if (pcbddc->coarse_psi_D) { 4910 PetscCall(MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D)); 4911 } 4912 } 4913 } 4914 /* get back data */ 4915 *coarse_submat_vals_n = coarse_submat_vals; 4916 PetscFunctionReturn(0); 4917 } 4918 4919 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4920 { 4921 Mat *work_mat; 4922 IS isrow_s,iscol_s; 4923 PetscBool rsorted,csorted; 4924 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4925 4926 PetscFunctionBegin; 4927 PetscCall(ISSorted(isrow,&rsorted)); 4928 PetscCall(ISSorted(iscol,&csorted)); 4929 PetscCall(ISGetLocalSize(isrow,&rsize)); 4930 PetscCall(ISGetLocalSize(iscol,&csize)); 4931 4932 if (!rsorted) { 4933 const PetscInt *idxs; 4934 PetscInt *idxs_sorted,i; 4935 4936 PetscCall(PetscMalloc1(rsize,&idxs_perm_r)); 4937 PetscCall(PetscMalloc1(rsize,&idxs_sorted)); 4938 for (i=0;i<rsize;i++) { 4939 idxs_perm_r[i] = i; 4940 } 4941 PetscCall(ISGetIndices(isrow,&idxs)); 4942 PetscCall(PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r)); 4943 for (i=0;i<rsize;i++) { 4944 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4945 } 4946 PetscCall(ISRestoreIndices(isrow,&idxs)); 4947 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s)); 4948 } else { 4949 PetscCall(PetscObjectReference((PetscObject)isrow)); 4950 isrow_s = isrow; 4951 } 4952 4953 if (!csorted) { 4954 if (isrow == iscol) { 4955 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4956 iscol_s = isrow_s; 4957 } else { 4958 const PetscInt *idxs; 4959 PetscInt *idxs_sorted,i; 4960 4961 PetscCall(PetscMalloc1(csize,&idxs_perm_c)); 4962 PetscCall(PetscMalloc1(csize,&idxs_sorted)); 4963 for (i=0;i<csize;i++) { 4964 idxs_perm_c[i] = i; 4965 } 4966 PetscCall(ISGetIndices(iscol,&idxs)); 4967 PetscCall(PetscSortIntWithPermutation(csize,idxs,idxs_perm_c)); 4968 for (i=0;i<csize;i++) { 4969 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4970 } 4971 PetscCall(ISRestoreIndices(iscol,&idxs)); 4972 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s)); 4973 } 4974 } else { 4975 PetscCall(PetscObjectReference((PetscObject)iscol)); 4976 iscol_s = iscol; 4977 } 4978 4979 PetscCall(MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat)); 4980 4981 if (!rsorted || !csorted) { 4982 Mat new_mat; 4983 IS is_perm_r,is_perm_c; 4984 4985 if (!rsorted) { 4986 PetscInt *idxs_r,i; 4987 PetscCall(PetscMalloc1(rsize,&idxs_r)); 4988 for (i=0;i<rsize;i++) { 4989 idxs_r[idxs_perm_r[i]] = i; 4990 } 4991 PetscCall(PetscFree(idxs_perm_r)); 4992 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r)); 4993 } else { 4994 PetscCall(ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r)); 4995 } 4996 PetscCall(ISSetPermutation(is_perm_r)); 4997 4998 if (!csorted) { 4999 if (isrow_s == iscol_s) { 5000 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5001 is_perm_c = is_perm_r; 5002 } else { 5003 PetscInt *idxs_c,i; 5004 PetscCheck(idxs_perm_c,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 5005 PetscCall(PetscMalloc1(csize,&idxs_c)); 5006 for (i=0;i<csize;i++) { 5007 idxs_c[idxs_perm_c[i]] = i; 5008 } 5009 PetscCall(PetscFree(idxs_perm_c)); 5010 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c)); 5011 } 5012 } else { 5013 PetscCall(ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c)); 5014 } 5015 PetscCall(ISSetPermutation(is_perm_c)); 5016 5017 PetscCall(MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat)); 5018 PetscCall(MatDestroy(&work_mat[0])); 5019 work_mat[0] = new_mat; 5020 PetscCall(ISDestroy(&is_perm_r)); 5021 PetscCall(ISDestroy(&is_perm_c)); 5022 } 5023 5024 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5025 *B = work_mat[0]; 5026 PetscCall(MatDestroyMatrices(1,&work_mat)); 5027 PetscCall(ISDestroy(&isrow_s)); 5028 PetscCall(ISDestroy(&iscol_s)); 5029 PetscFunctionReturn(0); 5030 } 5031 5032 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5033 { 5034 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5035 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5036 Mat new_mat,lA; 5037 IS is_local,is_global; 5038 PetscInt local_size; 5039 PetscBool isseqaij; 5040 5041 PetscFunctionBegin; 5042 PetscCall(MatDestroy(&pcbddc->local_mat)); 5043 PetscCall(MatGetSize(matis->A,&local_size,NULL)); 5044 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local)); 5045 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global)); 5046 PetscCall(ISDestroy(&is_local)); 5047 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat)); 5048 PetscCall(ISDestroy(&is_global)); 5049 5050 if (pcbddc->dbg_flag) { 5051 Vec x,x_change; 5052 PetscReal error; 5053 5054 PetscCall(MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change)); 5055 PetscCall(VecSetRandom(x,NULL)); 5056 PetscCall(MatMult(ChangeOfBasisMatrix,x,x_change)); 5057 PetscCall(VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5058 PetscCall(VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD)); 5059 PetscCall(MatMult(new_mat,matis->x,matis->y)); 5060 if (!pcbddc->change_interior) { 5061 const PetscScalar *x,*y,*v; 5062 PetscReal lerror = 0.; 5063 PetscInt i; 5064 5065 PetscCall(VecGetArrayRead(matis->x,&x)); 5066 PetscCall(VecGetArrayRead(matis->y,&y)); 5067 PetscCall(VecGetArrayRead(matis->counter,&v)); 5068 for (i=0;i<local_size;i++) 5069 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 5070 lerror = PetscAbsScalar(x[i]-y[i]); 5071 PetscCall(VecRestoreArrayRead(matis->x,&x)); 5072 PetscCall(VecRestoreArrayRead(matis->y,&y)); 5073 PetscCall(VecRestoreArrayRead(matis->counter,&v)); 5074 PetscCallMPI(MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc))); 5075 if (error > PETSC_SMALL) { 5076 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5077 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 5078 } else { 5079 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5080 } 5081 } 5082 } 5083 PetscCall(VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5084 PetscCall(VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE)); 5085 PetscCall(VecAXPY(x,-1.0,x_change)); 5086 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 5087 if (error > PETSC_SMALL) { 5088 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5089 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5090 } else { 5091 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5092 } 5093 } 5094 PetscCall(VecDestroy(&x)); 5095 PetscCall(VecDestroy(&x_change)); 5096 } 5097 5098 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5099 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA)); 5100 5101 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5102 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij)); 5103 if (isseqaij) { 5104 PetscCall(MatDestroy(&pcbddc->local_mat)); 5105 PetscCall(MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5106 if (lA) { 5107 Mat work; 5108 PetscCall(MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5109 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5110 PetscCall(MatDestroy(&work)); 5111 } 5112 } else { 5113 Mat work_mat; 5114 5115 PetscCall(MatDestroy(&pcbddc->local_mat)); 5116 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5117 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat)); 5118 PetscCall(MatDestroy(&work_mat)); 5119 if (lA) { 5120 Mat work; 5121 PetscCall(MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat)); 5122 PetscCall(MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work)); 5123 PetscCall(PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work)); 5124 PetscCall(MatDestroy(&work)); 5125 } 5126 } 5127 if (matis->A->symmetric_set) { 5128 PetscCall(MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric)); 5129 #if !defined(PETSC_USE_COMPLEX) 5130 PetscCall(MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric)); 5131 #endif 5132 } 5133 PetscCall(MatDestroy(&new_mat)); 5134 PetscFunctionReturn(0); 5135 } 5136 5137 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5138 { 5139 PC_IS* pcis = (PC_IS*)(pc->data); 5140 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5141 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5142 PetscInt *idx_R_local=NULL; 5143 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5144 PetscInt vbs,bs; 5145 PetscBT bitmask=NULL; 5146 5147 PetscFunctionBegin; 5148 /* 5149 No need to setup local scatters if 5150 - primal space is unchanged 5151 AND 5152 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5153 AND 5154 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5155 */ 5156 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5157 PetscFunctionReturn(0); 5158 } 5159 /* destroy old objects */ 5160 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5161 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5162 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5163 /* Set Non-overlapping dimensions */ 5164 n_B = pcis->n_B; 5165 n_D = pcis->n - n_B; 5166 n_vertices = pcbddc->n_vertices; 5167 5168 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5169 5170 /* create auxiliary bitmask and allocate workspace */ 5171 if (!sub_schurs || !sub_schurs->reuse_solver) { 5172 PetscCall(PetscMalloc1(pcis->n-n_vertices,&idx_R_local)); 5173 PetscCall(PetscBTCreate(pcis->n,&bitmask)); 5174 for (i=0;i<n_vertices;i++) { 5175 PetscCall(PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i])); 5176 } 5177 5178 for (i=0, n_R=0; i<pcis->n; i++) { 5179 if (!PetscBTLookup(bitmask,i)) { 5180 idx_R_local[n_R++] = i; 5181 } 5182 } 5183 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5184 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5185 5186 PetscCall(ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5187 PetscCall(ISGetLocalSize(reuse_solver->is_R,&n_R)); 5188 } 5189 5190 /* Block code */ 5191 vbs = 1; 5192 PetscCall(MatGetBlockSize(pcbddc->local_mat,&bs)); 5193 if (bs>1 && !(n_vertices%bs)) { 5194 PetscBool is_blocked = PETSC_TRUE; 5195 PetscInt *vary; 5196 if (!sub_schurs || !sub_schurs->reuse_solver) { 5197 PetscCall(PetscMalloc1(pcis->n/bs,&vary)); 5198 PetscCall(PetscArrayzero(vary,pcis->n/bs)); 5199 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5200 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5201 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5202 for (i=0; i<pcis->n/bs; i++) { 5203 if (vary[i]!=0 && vary[i]!=bs) { 5204 is_blocked = PETSC_FALSE; 5205 break; 5206 } 5207 } 5208 PetscCall(PetscFree(vary)); 5209 } else { 5210 /* Verify directly the R set */ 5211 for (i=0; i<n_R/bs; i++) { 5212 PetscInt j,node=idx_R_local[bs*i]; 5213 for (j=1; j<bs; j++) { 5214 if (node != idx_R_local[bs*i+j]-j) { 5215 is_blocked = PETSC_FALSE; 5216 break; 5217 } 5218 } 5219 } 5220 } 5221 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5222 vbs = bs; 5223 for (i=0;i<n_R/vbs;i++) { 5224 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5225 } 5226 } 5227 } 5228 PetscCall(ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local)); 5229 if (sub_schurs && sub_schurs->reuse_solver) { 5230 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5231 5232 PetscCall(ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local)); 5233 PetscCall(ISDestroy(&reuse_solver->is_R)); 5234 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5235 reuse_solver->is_R = pcbddc->is_R_local; 5236 } else { 5237 PetscCall(PetscFree(idx_R_local)); 5238 } 5239 5240 /* print some info if requested */ 5241 if (pcbddc->dbg_flag) { 5242 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5243 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5244 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5245 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank)); 5246 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B)); 5247 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size)); 5248 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5249 } 5250 5251 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5252 if (!sub_schurs || !sub_schurs->reuse_solver) { 5253 IS is_aux1,is_aux2; 5254 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5255 5256 PetscCall(ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5257 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array1)); 5258 PetscCall(PetscMalloc1(pcis->n_B-n_vertices,&aux_array2)); 5259 PetscCall(ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5260 for (i=0; i<n_D; i++) { 5261 PetscCall(PetscBTSet(bitmask,is_indices[i])); 5262 } 5263 PetscCall(ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices)); 5264 for (i=0, j=0; i<n_R; i++) { 5265 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5266 aux_array1[j++] = i; 5267 } 5268 } 5269 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5270 PetscCall(ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5271 for (i=0, j=0; i<n_B; i++) { 5272 if (!PetscBTLookup(bitmask,is_indices[i])) { 5273 aux_array2[j++] = i; 5274 } 5275 } 5276 PetscCall(ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices)); 5277 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2)); 5278 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B)); 5279 PetscCall(ISDestroy(&is_aux1)); 5280 PetscCall(ISDestroy(&is_aux2)); 5281 5282 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5283 PetscCall(PetscMalloc1(n_D,&aux_array1)); 5284 for (i=0, j=0; i<n_R; i++) { 5285 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5286 aux_array1[j++] = i; 5287 } 5288 } 5289 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1)); 5290 PetscCall(VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5291 PetscCall(ISDestroy(&is_aux1)); 5292 } 5293 PetscCall(PetscBTDestroy(&bitmask)); 5294 PetscCall(ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local)); 5295 } else { 5296 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5297 IS tis; 5298 PetscInt schur_size; 5299 5300 PetscCall(ISGetLocalSize(reuse_solver->is_B,&schur_size)); 5301 PetscCall(ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis)); 5302 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B)); 5303 PetscCall(ISDestroy(&tis)); 5304 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5305 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis)); 5306 PetscCall(VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D)); 5307 PetscCall(ISDestroy(&tis)); 5308 } 5309 } 5310 PetscFunctionReturn(0); 5311 } 5312 5313 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5314 { 5315 MatNullSpace NullSpace; 5316 Mat dmat; 5317 const Vec *nullvecs; 5318 Vec v,v2,*nullvecs2; 5319 VecScatter sct = NULL; 5320 PetscContainer c; 5321 PetscScalar *ddata; 5322 PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs; 5323 PetscBool nnsp_has_cnst; 5324 5325 PetscFunctionBegin; 5326 if (!is && !B) { /* MATIS */ 5327 Mat_IS* matis = (Mat_IS*)A->data; 5328 5329 if (!B) { 5330 PetscCall(MatISGetLocalMat(A,&B)); 5331 } 5332 sct = matis->cctx; 5333 PetscCall(PetscObjectReference((PetscObject)sct)); 5334 } else { 5335 PetscCall(MatGetNullSpace(B,&NullSpace)); 5336 if (!NullSpace) { 5337 PetscCall(MatGetNearNullSpace(B,&NullSpace)); 5338 } 5339 if (NullSpace) PetscFunctionReturn(0); 5340 } 5341 PetscCall(MatGetNullSpace(A,&NullSpace)); 5342 if (!NullSpace) { 5343 PetscCall(MatGetNearNullSpace(A,&NullSpace)); 5344 } 5345 if (!NullSpace) PetscFunctionReturn(0); 5346 5347 PetscCall(MatCreateVecs(A,&v,NULL)); 5348 PetscCall(MatCreateVecs(B,&v2,NULL)); 5349 if (!sct) { 5350 PetscCall(VecScatterCreate(v,is,v2,NULL,&sct)); 5351 } 5352 PetscCall(MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs)); 5353 bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst; 5354 PetscCall(PetscMalloc1(bsiz,&nullvecs2)); 5355 PetscCall(VecGetBlockSize(v2,&bs)); 5356 PetscCall(VecGetSize(v2,&N)); 5357 PetscCall(VecGetLocalSize(v2,&n)); 5358 PetscCall(PetscMalloc1(n*bsiz,&ddata)); 5359 for (k=0;k<nnsp_size;k++) { 5360 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k])); 5361 PetscCall(VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5362 PetscCall(VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD)); 5363 } 5364 if (nnsp_has_cnst) { 5365 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size])); 5366 PetscCall(VecSet(nullvecs2[nnsp_size],1.0)); 5367 } 5368 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2)); 5369 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace)); 5370 5371 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat)); 5372 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B),&c)); 5373 PetscCall(PetscContainerSetPointer(c,ddata)); 5374 PetscCall(PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault)); 5375 PetscCall(PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c)); 5376 PetscCall(PetscContainerDestroy(&c)); 5377 PetscCall(PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat)); 5378 PetscCall(MatDestroy(&dmat)); 5379 5380 for (k=0;k<bsiz;k++) { 5381 PetscCall(VecDestroy(&nullvecs2[k])); 5382 } 5383 PetscCall(PetscFree(nullvecs2)); 5384 PetscCall(MatSetNearNullSpace(B,NullSpace)); 5385 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5386 PetscCall(VecDestroy(&v)); 5387 PetscCall(VecDestroy(&v2)); 5388 PetscCall(VecScatterDestroy(&sct)); 5389 PetscFunctionReturn(0); 5390 } 5391 5392 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5393 { 5394 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5395 PC_IS *pcis = (PC_IS*)pc->data; 5396 PC pc_temp; 5397 Mat A_RR; 5398 MatNullSpace nnsp; 5399 MatReuse reuse; 5400 PetscScalar m_one = -1.0; 5401 PetscReal value; 5402 PetscInt n_D,n_R; 5403 PetscBool issbaij,opts; 5404 void (*f)(void) = NULL; 5405 char dir_prefix[256],neu_prefix[256],str_level[16]; 5406 size_t len; 5407 5408 PetscFunctionBegin; 5409 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5410 /* approximate solver, propagate NearNullSpace if needed */ 5411 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5412 MatNullSpace gnnsp1,gnnsp2; 5413 PetscBool lhas,ghas; 5414 5415 PetscCall(MatGetNearNullSpace(pcbddc->local_mat,&nnsp)); 5416 PetscCall(MatGetNearNullSpace(pc->pmat,&gnnsp1)); 5417 PetscCall(MatGetNullSpace(pc->pmat,&gnnsp2)); 5418 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5419 PetscCallMPI(MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 5420 if (!ghas && (gnnsp1 || gnnsp2)) { 5421 PetscCall(MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL)); 5422 } 5423 } 5424 5425 /* compute prefixes */ 5426 PetscCall(PetscStrcpy(dir_prefix,"")); 5427 PetscCall(PetscStrcpy(neu_prefix,"")); 5428 if (!pcbddc->current_level) { 5429 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix))); 5430 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix))); 5431 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5432 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5433 } else { 5434 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 5435 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 5436 len -= 15; /* remove "pc_bddc_coarse_" */ 5437 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5438 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5439 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5440 PetscCall(PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1)); 5441 PetscCall(PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1)); 5442 PetscCall(PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix))); 5443 PetscCall(PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix))); 5444 PetscCall(PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix))); 5445 PetscCall(PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix))); 5446 } 5447 5448 /* DIRICHLET PROBLEM */ 5449 if (dirichlet) { 5450 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5451 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5452 PetscCheckFalse(!sub_schurs || !sub_schurs->reuse_solver,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5453 if (pcbddc->dbg_flag) { 5454 Mat A_IIn; 5455 5456 PetscCall(PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn)); 5457 PetscCall(MatDestroy(&pcis->A_II)); 5458 pcis->A_II = A_IIn; 5459 } 5460 } 5461 if (pcbddc->local_mat->symmetric_set) { 5462 PetscCall(MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5463 } 5464 /* Matrix for Dirichlet problem is pcis->A_II */ 5465 n_D = pcis->n - pcis->n_B; 5466 opts = PETSC_FALSE; 5467 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5468 opts = PETSC_TRUE; 5469 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D)); 5470 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1)); 5471 /* default */ 5472 PetscCall(KSPSetType(pcbddc->ksp_D,KSPPREONLY)); 5473 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix)); 5474 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij)); 5475 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5476 if (issbaij) { 5477 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5478 } else { 5479 PetscCall(PCSetType(pc_temp,PCLU)); 5480 } 5481 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure)); 5482 } 5483 PetscCall(MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix)); 5484 PetscCall(KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II)); 5485 /* Allow user's customization */ 5486 if (opts) { 5487 PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5488 } 5489 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5490 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5491 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II)); 5492 } 5493 PetscCall(MatGetNearNullSpace(pcis->pA_II,&nnsp)); 5494 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5495 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5496 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5497 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5498 const PetscInt *idxs; 5499 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5500 5501 PetscCall(ISGetLocalSize(pcis->is_I_local,&nl)); 5502 PetscCall(ISGetIndices(pcis->is_I_local,&idxs)); 5503 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5504 for (i=0;i<nl;i++) { 5505 for (d=0;d<cdim;d++) { 5506 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5507 } 5508 } 5509 PetscCall(ISRestoreIndices(pcis->is_I_local,&idxs)); 5510 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5511 PetscCall(PetscFree(scoords)); 5512 } 5513 if (sub_schurs && sub_schurs->reuse_solver) { 5514 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5515 5516 PetscCall(KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver)); 5517 } 5518 5519 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5520 if (!n_D) { 5521 PetscCall(KSPGetPC(pcbddc->ksp_D,&pc_temp)); 5522 PetscCall(PCSetType(pc_temp,PCNONE)); 5523 } 5524 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5525 /* set ksp_D into pcis data */ 5526 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5527 PetscCall(KSPDestroy(&pcis->ksp_D)); 5528 pcis->ksp_D = pcbddc->ksp_D; 5529 } 5530 5531 /* NEUMANN PROBLEM */ 5532 A_RR = NULL; 5533 if (neumann) { 5534 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5535 PetscInt ibs,mbs; 5536 PetscBool issbaij, reuse_neumann_solver; 5537 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5538 5539 reuse_neumann_solver = PETSC_FALSE; 5540 if (sub_schurs && sub_schurs->reuse_solver) { 5541 IS iP; 5542 5543 reuse_neumann_solver = PETSC_TRUE; 5544 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP)); 5545 if (iP) reuse_neumann_solver = PETSC_FALSE; 5546 } 5547 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5548 PetscCall(ISGetSize(pcbddc->is_R_local,&n_R)); 5549 if (pcbddc->ksp_R) { /* already created ksp */ 5550 PetscInt nn_R; 5551 PetscCall(KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR)); 5552 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5553 PetscCall(MatGetSize(A_RR,&nn_R,NULL)); 5554 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5555 PetscCall(KSPReset(pcbddc->ksp_R)); 5556 PetscCall(MatDestroy(&A_RR)); 5557 reuse = MAT_INITIAL_MATRIX; 5558 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5559 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5560 PetscCall(MatDestroy(&A_RR)); 5561 reuse = MAT_INITIAL_MATRIX; 5562 } else { /* safe to reuse the matrix */ 5563 reuse = MAT_REUSE_MATRIX; 5564 } 5565 } 5566 /* last check */ 5567 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5568 PetscCall(MatDestroy(&A_RR)); 5569 reuse = MAT_INITIAL_MATRIX; 5570 } 5571 } else { /* first time, so we need to create the matrix */ 5572 reuse = MAT_INITIAL_MATRIX; 5573 } 5574 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5575 TODO: Get Rid of these conversions */ 5576 PetscCall(MatGetBlockSize(pcbddc->local_mat,&mbs)); 5577 PetscCall(ISGetBlockSize(pcbddc->is_R_local,&ibs)); 5578 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij)); 5579 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5580 if (matis->A == pcbddc->local_mat) { 5581 PetscCall(MatDestroy(&pcbddc->local_mat)); 5582 PetscCall(MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5583 } else { 5584 PetscCall(MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5585 } 5586 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5587 if (matis->A == pcbddc->local_mat) { 5588 PetscCall(MatDestroy(&pcbddc->local_mat)); 5589 PetscCall(MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat)); 5590 } else { 5591 PetscCall(MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat)); 5592 } 5593 } 5594 /* extract A_RR */ 5595 if (reuse_neumann_solver) { 5596 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5597 5598 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5599 PetscCall(MatDestroy(&A_RR)); 5600 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5601 PetscCall(PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR)); 5602 } else { 5603 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR)); 5604 } 5605 } else { 5606 PetscCall(MatDestroy(&A_RR)); 5607 PetscCall(PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL)); 5608 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5609 } 5610 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5611 PetscCall(MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR)); 5612 } 5613 if (pcbddc->local_mat->symmetric_set) { 5614 PetscCall(MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric)); 5615 } 5616 opts = PETSC_FALSE; 5617 if (!pcbddc->ksp_R) { /* create object if not present */ 5618 opts = PETSC_TRUE; 5619 PetscCall(KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R)); 5620 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1)); 5621 /* default */ 5622 PetscCall(KSPSetType(pcbddc->ksp_R,KSPPREONLY)); 5623 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix)); 5624 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5625 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij)); 5626 if (issbaij) { 5627 PetscCall(PCSetType(pc_temp,PCCHOLESKY)); 5628 } else { 5629 PetscCall(PCSetType(pc_temp,PCLU)); 5630 } 5631 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure)); 5632 } 5633 PetscCall(KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR)); 5634 PetscCall(MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix)); 5635 if (opts) { /* Allow user's customization once */ 5636 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5637 } 5638 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5639 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5640 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR)); 5641 } 5642 PetscCall(MatGetNearNullSpace(A_RR,&nnsp)); 5643 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5644 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f)); 5645 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5646 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5647 const PetscInt *idxs; 5648 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5649 5650 PetscCall(ISGetLocalSize(pcbddc->is_R_local,&nl)); 5651 PetscCall(ISGetIndices(pcbddc->is_R_local,&idxs)); 5652 PetscCall(PetscMalloc1(nl*cdim,&scoords)); 5653 for (i=0;i<nl;i++) { 5654 for (d=0;d<cdim;d++) { 5655 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5656 } 5657 } 5658 PetscCall(ISRestoreIndices(pcbddc->is_R_local,&idxs)); 5659 PetscCall(PCSetCoordinates(pc_temp,cdim,nl,scoords)); 5660 PetscCall(PetscFree(scoords)); 5661 } 5662 5663 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5664 if (!n_R) { 5665 PetscCall(KSPGetPC(pcbddc->ksp_R,&pc_temp)); 5666 PetscCall(PCSetType(pc_temp,PCNONE)); 5667 } 5668 /* Reuse solver if it is present */ 5669 if (reuse_neumann_solver) { 5670 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5671 5672 PetscCall(KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver)); 5673 } 5674 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5675 } 5676 5677 if (pcbddc->dbg_flag) { 5678 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5679 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5680 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 5681 } 5682 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0)); 5683 5684 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5685 if (pcbddc->NullSpace_corr[0]) { 5686 PetscCall(PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE)); 5687 } 5688 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5689 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1])); 5690 } 5691 if (neumann && pcbddc->NullSpace_corr[2]) { 5692 PetscCall(PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3])); 5693 } 5694 /* check Dirichlet and Neumann solvers */ 5695 if (pcbddc->dbg_flag) { 5696 if (dirichlet) { /* Dirichlet */ 5697 PetscCall(VecSetRandom(pcis->vec1_D,NULL)); 5698 PetscCall(MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D)); 5699 PetscCall(KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D)); 5700 PetscCall(KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D)); 5701 PetscCall(VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D)); 5702 PetscCall(VecNorm(pcis->vec1_D,NORM_INFINITY,&value)); 5703 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value)); 5704 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5705 } 5706 if (neumann) { /* Neumann */ 5707 PetscCall(VecSetRandom(pcbddc->vec1_R,NULL)); 5708 PetscCall(MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R)); 5709 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R)); 5710 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R)); 5711 PetscCall(VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R)); 5712 PetscCall(VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value)); 5713 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value)); 5714 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5715 } 5716 } 5717 /* free Neumann problem's matrix */ 5718 PetscCall(MatDestroy(&A_RR)); 5719 PetscFunctionReturn(0); 5720 } 5721 5722 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5723 { 5724 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5725 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5726 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5727 5728 PetscFunctionBegin; 5729 if (!reuse_solver) { 5730 PetscCall(VecSet(pcbddc->vec1_R,0.)); 5731 } 5732 if (!pcbddc->switch_static) { 5733 if (applytranspose && pcbddc->local_auxmat1) { 5734 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C)); 5735 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5736 } 5737 if (!reuse_solver) { 5738 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5739 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5740 } else { 5741 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5742 5743 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5744 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD)); 5745 } 5746 } else { 5747 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5748 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5749 PetscCall(VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5750 PetscCall(VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5751 if (applytranspose && pcbddc->local_auxmat1) { 5752 PetscCall(MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C)); 5753 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B)); 5754 PetscCall(VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5755 PetscCall(VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE)); 5756 } 5757 } 5758 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5759 if (!reuse_solver || pcbddc->switch_static) { 5760 if (applytranspose) { 5761 PetscCall(KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5762 } else { 5763 PetscCall(KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R)); 5764 } 5765 PetscCall(KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R)); 5766 } else { 5767 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5768 5769 if (applytranspose) { 5770 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5771 } else { 5772 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B)); 5773 } 5774 } 5775 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0)); 5776 PetscCall(VecSet(inout_B,0.)); 5777 if (!pcbddc->switch_static) { 5778 if (!reuse_solver) { 5779 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5780 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5781 } else { 5782 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5783 5784 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5785 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE)); 5786 } 5787 if (!applytranspose && pcbddc->local_auxmat1) { 5788 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5789 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B)); 5790 } 5791 } else { 5792 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5793 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5794 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5795 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5796 if (!applytranspose && pcbddc->local_auxmat1) { 5797 PetscCall(MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C)); 5798 PetscCall(MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R)); 5799 } 5800 PetscCall(VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5801 PetscCall(VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD)); 5802 PetscCall(VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5803 PetscCall(VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD)); 5804 } 5805 PetscFunctionReturn(0); 5806 } 5807 5808 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5809 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5810 { 5811 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5812 PC_IS* pcis = (PC_IS*) (pc->data); 5813 const PetscScalar zero = 0.0; 5814 5815 PetscFunctionBegin; 5816 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5817 if (!pcbddc->benign_apply_coarse_only) { 5818 if (applytranspose) { 5819 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P)); 5820 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5821 } else { 5822 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P)); 5823 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P)); 5824 } 5825 } else { 5826 PetscCall(VecSet(pcbddc->vec1_P,zero)); 5827 } 5828 5829 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5830 if (pcbddc->benign_n) { 5831 PetscScalar *array; 5832 PetscInt j; 5833 5834 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5835 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5836 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5837 } 5838 5839 /* start communications from local primal nodes to rhs of coarse solver */ 5840 PetscCall(VecSet(pcbddc->coarse_vec,zero)); 5841 PetscCall(PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD)); 5842 PetscCall(PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD)); 5843 5844 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5845 if (pcbddc->coarse_ksp) { 5846 Mat coarse_mat; 5847 Vec rhs,sol; 5848 MatNullSpace nullsp; 5849 PetscBool isbddc = PETSC_FALSE; 5850 5851 if (pcbddc->benign_have_null) { 5852 PC coarse_pc; 5853 5854 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5855 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 5856 /* we need to propagate to coarser levels the need for a possible benign correction */ 5857 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5858 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5859 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5860 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5861 } 5862 } 5863 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&rhs)); 5864 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&sol)); 5865 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 5866 if (applytranspose) { 5867 PetscCheck(!pcbddc->benign_apply_coarse_only,PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5868 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5869 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol)); 5870 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5871 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5872 PetscCall(MatGetTransposeNullSpace(coarse_mat,&nullsp)); 5873 if (nullsp) { 5874 PetscCall(MatNullSpaceRemove(nullsp,sol)); 5875 } 5876 } else { 5877 PetscCall(MatGetNullSpace(coarse_mat,&nullsp)); 5878 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5879 PC coarse_pc; 5880 5881 if (nullsp) { 5882 PetscCall(MatNullSpaceRemove(nullsp,rhs)); 5883 } 5884 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5885 PetscCall(PCPreSolve(coarse_pc,pcbddc->coarse_ksp)); 5886 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol)); 5887 PetscCall(PCPostSolve(coarse_pc,pcbddc->coarse_ksp)); 5888 } else { 5889 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5890 PetscCall(KSPSolve(pcbddc->coarse_ksp,rhs,sol)); 5891 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0)); 5892 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp,pc,sol)); 5893 if (nullsp) { 5894 PetscCall(MatNullSpaceRemove(nullsp,sol)); 5895 } 5896 } 5897 } 5898 /* we don't need the benign correction at coarser levels anymore */ 5899 if (pcbddc->benign_have_null && isbddc) { 5900 PC coarse_pc; 5901 PC_BDDC* coarsepcbddc; 5902 5903 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 5904 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5905 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5906 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5907 } 5908 } 5909 5910 /* Local solution on R nodes */ 5911 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5912 PetscCall(PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose)); 5913 } 5914 /* communications from coarse sol to local primal nodes */ 5915 PetscCall(PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE)); 5916 PetscCall(PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE)); 5917 5918 /* Sum contributions from the two levels */ 5919 if (!pcbddc->benign_apply_coarse_only) { 5920 if (applytranspose) { 5921 PetscCall(MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5922 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5923 } else { 5924 PetscCall(MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B)); 5925 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D)); 5926 } 5927 /* store p0 */ 5928 if (pcbddc->benign_n) { 5929 PetscScalar *array; 5930 PetscInt j; 5931 5932 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 5933 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5934 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 5935 } 5936 } else { /* expand the coarse solution */ 5937 if (applytranspose) { 5938 PetscCall(MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B)); 5939 } else { 5940 PetscCall(MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B)); 5941 } 5942 } 5943 PetscFunctionReturn(0); 5944 } 5945 5946 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5947 { 5948 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5949 Vec from,to; 5950 const PetscScalar *array; 5951 5952 PetscFunctionBegin; 5953 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5954 from = pcbddc->coarse_vec; 5955 to = pcbddc->vec1_P; 5956 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5957 Vec tvec; 5958 5959 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5960 PetscCall(VecResetArray(tvec)); 5961 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&tvec)); 5962 PetscCall(VecGetArrayRead(tvec,&array)); 5963 PetscCall(VecPlaceArray(from,array)); 5964 PetscCall(VecRestoreArrayRead(tvec,&array)); 5965 } 5966 } else { /* from local to global -> put data in coarse right hand side */ 5967 from = pcbddc->vec1_P; 5968 to = pcbddc->coarse_vec; 5969 } 5970 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5971 PetscFunctionReturn(0); 5972 } 5973 5974 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5975 { 5976 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5977 Vec from,to; 5978 const PetscScalar *array; 5979 5980 PetscFunctionBegin; 5981 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5982 from = pcbddc->coarse_vec; 5983 to = pcbddc->vec1_P; 5984 } else { /* from local to global -> put data in coarse right hand side */ 5985 from = pcbddc->vec1_P; 5986 to = pcbddc->coarse_vec; 5987 } 5988 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode)); 5989 if (smode == SCATTER_FORWARD) { 5990 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5991 Vec tvec; 5992 5993 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&tvec)); 5994 PetscCall(VecGetArrayRead(to,&array)); 5995 PetscCall(VecPlaceArray(tvec,array)); 5996 PetscCall(VecRestoreArrayRead(to,&array)); 5997 } 5998 } else { 5999 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6000 PetscCall(VecResetArray(from)); 6001 } 6002 } 6003 PetscFunctionReturn(0); 6004 } 6005 6006 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6007 { 6008 PetscErrorCode ierr; 6009 PC_IS* pcis = (PC_IS*)(pc->data); 6010 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6011 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6012 /* one and zero */ 6013 PetscScalar one=1.0,zero=0.0; 6014 /* space to store constraints and their local indices */ 6015 PetscScalar *constraints_data; 6016 PetscInt *constraints_idxs,*constraints_idxs_B; 6017 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 6018 PetscInt *constraints_n; 6019 /* iterators */ 6020 PetscInt i,j,k,total_counts,total_counts_cc,cum; 6021 /* BLAS integers */ 6022 PetscBLASInt lwork,lierr; 6023 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 6024 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 6025 /* reuse */ 6026 PetscInt olocal_primal_size,olocal_primal_size_cc; 6027 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 6028 /* change of basis */ 6029 PetscBool qr_needed; 6030 PetscBT change_basis,qr_needed_idx; 6031 /* auxiliary stuff */ 6032 PetscInt *nnz,*is_indices; 6033 PetscInt ncc; 6034 /* some quantities */ 6035 PetscInt n_vertices,total_primal_vertices,valid_constraints; 6036 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 6037 PetscReal tol; /* tolerance for retaining eigenmodes */ 6038 6039 PetscFunctionBegin; 6040 tol = PetscSqrtReal(PETSC_SMALL); 6041 /* Destroy Mat objects computed previously */ 6042 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6043 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6044 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6045 /* save info on constraints from previous setup (if any) */ 6046 olocal_primal_size = pcbddc->local_primal_size; 6047 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6048 PetscCall(PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult)); 6049 PetscCall(PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc)); 6050 PetscCall(PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc)); 6051 PetscCall(PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult)); 6052 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6053 6054 if (!pcbddc->adaptive_selection) { 6055 IS ISForVertices,*ISForFaces,*ISForEdges; 6056 MatNullSpace nearnullsp; 6057 const Vec *nearnullvecs; 6058 Vec *localnearnullsp; 6059 PetscScalar *array; 6060 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 6061 PetscBool nnsp_has_cnst; 6062 /* LAPACK working arrays for SVD or POD */ 6063 PetscBool skip_lapack,boolforchange; 6064 PetscScalar *work; 6065 PetscReal *singular_vals; 6066 #if defined(PETSC_USE_COMPLEX) 6067 PetscReal *rwork; 6068 #endif 6069 PetscScalar *temp_basis = NULL,*correlation_mat = NULL; 6070 PetscBLASInt dummy_int=1; 6071 PetscScalar dummy_scalar=1.; 6072 PetscBool use_pod = PETSC_FALSE; 6073 6074 /* MKL SVD with same input gives different results on different processes! */ 6075 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6076 use_pod = PETSC_TRUE; 6077 #endif 6078 /* Get index sets for faces, edges and vertices from graph */ 6079 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices)); 6080 /* print some info */ 6081 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6082 PetscInt nv; 6083 6084 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 6085 PetscCall(ISGetSize(ISForVertices,&nv)); 6086 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6087 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6088 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices)); 6089 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges)); 6090 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces)); 6091 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6092 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6093 } 6094 6095 /* free unneeded index sets */ 6096 if (!pcbddc->use_vertices) { 6097 PetscCall(ISDestroy(&ISForVertices)); 6098 } 6099 if (!pcbddc->use_edges) { 6100 for (i=0;i<n_ISForEdges;i++) { 6101 PetscCall(ISDestroy(&ISForEdges[i])); 6102 } 6103 PetscCall(PetscFree(ISForEdges)); 6104 n_ISForEdges = 0; 6105 } 6106 if (!pcbddc->use_faces) { 6107 for (i=0;i<n_ISForFaces;i++) { 6108 PetscCall(ISDestroy(&ISForFaces[i])); 6109 } 6110 PetscCall(PetscFree(ISForFaces)); 6111 n_ISForFaces = 0; 6112 } 6113 6114 /* check if near null space is attached to global mat */ 6115 if (pcbddc->use_nnsp) { 6116 PetscCall(MatGetNearNullSpace(pc->pmat,&nearnullsp)); 6117 } else nearnullsp = NULL; 6118 6119 if (nearnullsp) { 6120 PetscCall(MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs)); 6121 /* remove any stored info */ 6122 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6123 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6124 /* store information for BDDC solver reuse */ 6125 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6126 pcbddc->onearnullspace = nearnullsp; 6127 PetscCall(PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state)); 6128 for (i=0;i<nnsp_size;i++) { 6129 PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i])); 6130 } 6131 } else { /* if near null space is not provided BDDC uses constants by default */ 6132 nnsp_size = 0; 6133 nnsp_has_cnst = PETSC_TRUE; 6134 } 6135 /* get max number of constraints on a single cc */ 6136 max_constraints = nnsp_size; 6137 if (nnsp_has_cnst) max_constraints++; 6138 6139 /* 6140 Evaluate maximum storage size needed by the procedure 6141 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6142 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6143 There can be multiple constraints per connected component 6144 */ 6145 n_vertices = 0; 6146 if (ISForVertices) { 6147 PetscCall(ISGetSize(ISForVertices,&n_vertices)); 6148 } 6149 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 6150 PetscCall(PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n)); 6151 6152 total_counts = n_ISForFaces+n_ISForEdges; 6153 total_counts *= max_constraints; 6154 total_counts += n_vertices; 6155 PetscCall(PetscBTCreate(total_counts,&change_basis)); 6156 6157 total_counts = 0; 6158 max_size_of_constraint = 0; 6159 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 6160 IS used_is; 6161 if (i<n_ISForEdges) { 6162 used_is = ISForEdges[i]; 6163 } else { 6164 used_is = ISForFaces[i-n_ISForEdges]; 6165 } 6166 PetscCall(ISGetSize(used_is,&j)); 6167 total_counts += j; 6168 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 6169 } 6170 PetscCall(PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B)); 6171 6172 /* get local part of global near null space vectors */ 6173 PetscCall(PetscMalloc1(nnsp_size,&localnearnullsp)); 6174 for (k=0;k<nnsp_size;k++) { 6175 PetscCall(VecDuplicate(pcis->vec1_N,&localnearnullsp[k])); 6176 PetscCall(VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6177 PetscCall(VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD)); 6178 } 6179 6180 /* whether or not to skip lapack calls */ 6181 skip_lapack = PETSC_TRUE; 6182 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6183 6184 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6185 if (!skip_lapack) { 6186 PetscScalar temp_work; 6187 6188 if (use_pod) { 6189 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6190 PetscCall(PetscMalloc1(max_constraints*max_constraints,&correlation_mat)); 6191 PetscCall(PetscMalloc1(max_constraints,&singular_vals)); 6192 PetscCall(PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis)); 6193 #if defined(PETSC_USE_COMPLEX) 6194 PetscCall(PetscMalloc1(3*max_constraints,&rwork)); 6195 #endif 6196 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6197 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6198 PetscCall(PetscBLASIntCast(max_constraints,&Blas_LDA)); 6199 lwork = -1; 6200 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6201 #if !defined(PETSC_USE_COMPLEX) 6202 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6203 #else 6204 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6205 #endif 6206 PetscCall(PetscFPTrapPop()); 6207 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6208 } else { 6209 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6210 /* SVD */ 6211 PetscInt max_n,min_n; 6212 max_n = max_size_of_constraint; 6213 min_n = max_constraints; 6214 if (max_size_of_constraint < max_constraints) { 6215 min_n = max_size_of_constraint; 6216 max_n = max_constraints; 6217 } 6218 PetscCall(PetscMalloc1(min_n,&singular_vals)); 6219 #if defined(PETSC_USE_COMPLEX) 6220 PetscCall(PetscMalloc1(5*min_n,&rwork)); 6221 #endif 6222 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6223 lwork = -1; 6224 PetscCall(PetscBLASIntCast(max_n,&Blas_M)); 6225 PetscCall(PetscBLASIntCast(min_n,&Blas_N)); 6226 PetscCall(PetscBLASIntCast(max_n,&Blas_LDA)); 6227 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6228 #if !defined(PETSC_USE_COMPLEX) 6229 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6230 #else 6231 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6232 #endif 6233 PetscCall(PetscFPTrapPop()); 6234 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6235 #else 6236 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6237 #endif /* on missing GESVD */ 6238 } 6239 /* Allocate optimal workspace */ 6240 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork)); 6241 PetscCall(PetscMalloc1(lwork,&work)); 6242 } 6243 /* Now we can loop on constraining sets */ 6244 total_counts = 0; 6245 constraints_idxs_ptr[0] = 0; 6246 constraints_data_ptr[0] = 0; 6247 /* vertices */ 6248 if (n_vertices) { 6249 PetscCall(ISGetIndices(ISForVertices,(const PetscInt**)&is_indices)); 6250 PetscCall(PetscArraycpy(constraints_idxs,is_indices,n_vertices)); 6251 for (i=0;i<n_vertices;i++) { 6252 constraints_n[total_counts] = 1; 6253 constraints_data[total_counts] = 1.0; 6254 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6255 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6256 total_counts++; 6257 } 6258 PetscCall(ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices)); 6259 n_vertices = total_counts; 6260 } 6261 6262 /* edges and faces */ 6263 total_counts_cc = total_counts; 6264 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6265 IS used_is; 6266 PetscBool idxs_copied = PETSC_FALSE; 6267 6268 if (ncc<n_ISForEdges) { 6269 used_is = ISForEdges[ncc]; 6270 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6271 } else { 6272 used_is = ISForFaces[ncc-n_ISForEdges]; 6273 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6274 } 6275 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6276 6277 PetscCall(ISGetSize(used_is,&size_of_constraint)); 6278 PetscCall(ISGetIndices(used_is,(const PetscInt**)&is_indices)); 6279 /* change of basis should not be performed on local periodic nodes */ 6280 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6281 if (nnsp_has_cnst) { 6282 PetscScalar quad_value; 6283 6284 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6285 idxs_copied = PETSC_TRUE; 6286 6287 if (!pcbddc->use_nnsp_true) { 6288 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6289 } else { 6290 quad_value = 1.0; 6291 } 6292 for (j=0;j<size_of_constraint;j++) { 6293 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6294 } 6295 temp_constraints++; 6296 total_counts++; 6297 } 6298 for (k=0;k<nnsp_size;k++) { 6299 PetscReal real_value; 6300 PetscScalar *ptr_to_data; 6301 6302 PetscCall(VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6303 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6304 for (j=0;j<size_of_constraint;j++) { 6305 ptr_to_data[j] = array[is_indices[j]]; 6306 } 6307 PetscCall(VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array)); 6308 /* check if array is null on the connected component */ 6309 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6310 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6311 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6312 temp_constraints++; 6313 total_counts++; 6314 if (!idxs_copied) { 6315 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint)); 6316 idxs_copied = PETSC_TRUE; 6317 } 6318 } 6319 } 6320 PetscCall(ISRestoreIndices(used_is,(const PetscInt**)&is_indices)); 6321 valid_constraints = temp_constraints; 6322 if (!pcbddc->use_nnsp_true && temp_constraints) { 6323 if (temp_constraints == 1) { /* just normalize the constraint */ 6324 PetscScalar norm,*ptr_to_data; 6325 6326 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6327 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6328 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6329 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6330 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6331 } else { /* perform SVD */ 6332 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6333 6334 if (use_pod) { 6335 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6336 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6337 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6338 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6339 from that computed using LAPACKgesvd 6340 -> This is due to a different computation of eigenvectors in LAPACKheev 6341 -> The quality of the POD-computed basis will be the same */ 6342 PetscCall(PetscArrayzero(correlation_mat,temp_constraints*temp_constraints)); 6343 /* Store upper triangular part of correlation matrix */ 6344 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6345 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6346 for (j=0;j<temp_constraints;j++) { 6347 for (k=0;k<j+1;k++) { 6348 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6349 } 6350 } 6351 /* compute eigenvalues and eigenvectors of correlation matrix */ 6352 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6353 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDA)); 6354 #if !defined(PETSC_USE_COMPLEX) 6355 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6356 #else 6357 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6358 #endif 6359 PetscCall(PetscFPTrapPop()); 6360 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6361 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6362 j = 0; 6363 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6364 total_counts = total_counts-j; 6365 valid_constraints = temp_constraints-j; 6366 /* scale and copy POD basis into used quadrature memory */ 6367 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6368 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6369 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_K)); 6370 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6371 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_LDB)); 6372 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6373 if (j<temp_constraints) { 6374 PetscInt ii; 6375 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6376 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6377 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6378 PetscCall(PetscFPTrapPop()); 6379 for (k=0;k<temp_constraints-j;k++) { 6380 for (ii=0;ii<size_of_constraint;ii++) { 6381 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6382 } 6383 } 6384 } 6385 } else { 6386 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6387 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6388 PetscCall(PetscBLASIntCast(temp_constraints,&Blas_N)); 6389 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6390 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6391 #if !defined(PETSC_USE_COMPLEX) 6392 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6393 #else 6394 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6395 #endif 6396 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6397 PetscCall(PetscFPTrapPop()); 6398 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6399 k = temp_constraints; 6400 if (k > size_of_constraint) k = size_of_constraint; 6401 j = 0; 6402 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6403 valid_constraints = k-j; 6404 total_counts = total_counts-temp_constraints+valid_constraints; 6405 #else 6406 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen"); 6407 #endif /* on missing GESVD */ 6408 } 6409 } 6410 } 6411 /* update pointers information */ 6412 if (valid_constraints) { 6413 constraints_n[total_counts_cc] = valid_constraints; 6414 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6415 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6416 /* set change_of_basis flag */ 6417 if (boolforchange) { 6418 PetscBTSet(change_basis,total_counts_cc); 6419 } 6420 total_counts_cc++; 6421 } 6422 } 6423 /* free workspace */ 6424 if (!skip_lapack) { 6425 PetscCall(PetscFree(work)); 6426 #if defined(PETSC_USE_COMPLEX) 6427 PetscCall(PetscFree(rwork)); 6428 #endif 6429 PetscCall(PetscFree(singular_vals)); 6430 PetscCall(PetscFree(correlation_mat)); 6431 PetscCall(PetscFree(temp_basis)); 6432 } 6433 for (k=0;k<nnsp_size;k++) { 6434 PetscCall(VecDestroy(&localnearnullsp[k])); 6435 } 6436 PetscCall(PetscFree(localnearnullsp)); 6437 /* free index sets of faces, edges and vertices */ 6438 for (i=0;i<n_ISForFaces;i++) { 6439 PetscCall(ISDestroy(&ISForFaces[i])); 6440 } 6441 if (n_ISForFaces) { 6442 PetscCall(PetscFree(ISForFaces)); 6443 } 6444 for (i=0;i<n_ISForEdges;i++) { 6445 PetscCall(ISDestroy(&ISForEdges[i])); 6446 } 6447 if (n_ISForEdges) { 6448 PetscCall(PetscFree(ISForEdges)); 6449 } 6450 PetscCall(ISDestroy(&ISForVertices)); 6451 } else { 6452 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6453 6454 total_counts = 0; 6455 n_vertices = 0; 6456 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6457 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 6458 } 6459 max_constraints = 0; 6460 total_counts_cc = 0; 6461 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6462 total_counts += pcbddc->adaptive_constraints_n[i]; 6463 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6464 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6465 } 6466 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6467 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6468 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6469 constraints_data = pcbddc->adaptive_constraints_data; 6470 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6471 PetscCall(PetscMalloc1(total_counts_cc,&constraints_n)); 6472 total_counts_cc = 0; 6473 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6474 if (pcbddc->adaptive_constraints_n[i]) { 6475 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6476 } 6477 } 6478 6479 max_size_of_constraint = 0; 6480 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6481 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B)); 6482 /* Change of basis */ 6483 PetscCall(PetscBTCreate(total_counts_cc,&change_basis)); 6484 if (pcbddc->use_change_of_basis) { 6485 for (i=0;i<sub_schurs->n_subs;i++) { 6486 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6487 PetscCall(PetscBTSet(change_basis,i+n_vertices)); 6488 } 6489 } 6490 } 6491 } 6492 pcbddc->local_primal_size = total_counts; 6493 PetscCall(PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs)); 6494 6495 /* map constraints_idxs in boundary numbering */ 6496 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B)); 6497 PetscCheckFalse(i != constraints_idxs_ptr[total_counts_cc],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6498 6499 /* Create constraint matrix */ 6500 PetscCall(MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix)); 6501 PetscCall(MatSetType(pcbddc->ConstraintMatrix,MATAIJ)); 6502 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n)); 6503 6504 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6505 /* determine if a QR strategy is needed for change of basis */ 6506 qr_needed = pcbddc->use_qr_single; 6507 PetscCall(PetscBTCreate(total_counts_cc,&qr_needed_idx)); 6508 total_primal_vertices=0; 6509 pcbddc->local_primal_size_cc = 0; 6510 for (i=0;i<total_counts_cc;i++) { 6511 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6512 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6513 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6514 pcbddc->local_primal_size_cc += 1; 6515 } else if (PetscBTLookup(change_basis,i)) { 6516 for (k=0;k<constraints_n[i];k++) { 6517 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6518 } 6519 pcbddc->local_primal_size_cc += constraints_n[i]; 6520 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6521 PetscBTSet(qr_needed_idx,i); 6522 qr_needed = PETSC_TRUE; 6523 } 6524 } else { 6525 pcbddc->local_primal_size_cc += 1; 6526 } 6527 } 6528 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6529 pcbddc->n_vertices = total_primal_vertices; 6530 /* permute indices in order to have a sorted set of vertices */ 6531 PetscCall(PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs)); 6532 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult)); 6533 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices)); 6534 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6535 6536 /* nonzero structure of constraint matrix */ 6537 /* and get reference dof for local constraints */ 6538 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&nnz)); 6539 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6540 6541 j = total_primal_vertices; 6542 total_counts = total_primal_vertices; 6543 cum = total_primal_vertices; 6544 for (i=n_vertices;i<total_counts_cc;i++) { 6545 if (!PetscBTLookup(change_basis,i)) { 6546 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6547 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6548 cum++; 6549 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6550 for (k=0;k<constraints_n[i];k++) { 6551 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6552 nnz[j+k] = size_of_constraint; 6553 } 6554 j += constraints_n[i]; 6555 } 6556 } 6557 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz)); 6558 PetscCall(MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6559 PetscCall(PetscFree(nnz)); 6560 6561 /* set values in constraint matrix */ 6562 for (i=0;i<total_primal_vertices;i++) { 6563 PetscCall(MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES)); 6564 } 6565 total_counts = total_primal_vertices; 6566 for (i=n_vertices;i<total_counts_cc;i++) { 6567 if (!PetscBTLookup(change_basis,i)) { 6568 PetscInt *cols; 6569 6570 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6571 cols = constraints_idxs+constraints_idxs_ptr[i]; 6572 for (k=0;k<constraints_n[i];k++) { 6573 PetscInt row = total_counts+k; 6574 PetscScalar *vals; 6575 6576 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6577 PetscCall(MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES)); 6578 } 6579 total_counts += constraints_n[i]; 6580 } 6581 } 6582 /* assembling */ 6583 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6584 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY)); 6585 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view")); 6586 6587 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6588 if (pcbddc->use_change_of_basis) { 6589 /* dual and primal dofs on a single cc */ 6590 PetscInt dual_dofs,primal_dofs; 6591 /* working stuff for GEQRF */ 6592 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6593 PetscBLASInt lqr_work; 6594 /* working stuff for UNGQR */ 6595 PetscScalar *gqr_work = NULL,lgqr_work_t=0.0; 6596 PetscBLASInt lgqr_work; 6597 /* working stuff for TRTRS */ 6598 PetscScalar *trs_rhs = NULL; 6599 PetscBLASInt Blas_NRHS; 6600 /* pointers for values insertion into change of basis matrix */ 6601 PetscInt *start_rows,*start_cols; 6602 PetscScalar *start_vals; 6603 /* working stuff for values insertion */ 6604 PetscBT is_primal; 6605 PetscInt *aux_primal_numbering_B; 6606 /* matrix sizes */ 6607 PetscInt global_size,local_size; 6608 /* temporary change of basis */ 6609 Mat localChangeOfBasisMatrix; 6610 /* extra space for debugging */ 6611 PetscScalar *dbg_work = NULL; 6612 6613 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6614 PetscCall(MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix)); 6615 PetscCall(MatSetType(localChangeOfBasisMatrix,MATAIJ)); 6616 PetscCall(MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n)); 6617 /* nonzeros for local mat */ 6618 PetscCall(PetscMalloc1(pcis->n,&nnz)); 6619 if (!pcbddc->benign_change || pcbddc->fake_change) { 6620 for (i=0;i<pcis->n;i++) nnz[i]=1; 6621 } else { 6622 const PetscInt *ii; 6623 PetscInt n; 6624 PetscBool flg_row; 6625 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6626 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6627 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row)); 6628 } 6629 for (i=n_vertices;i<total_counts_cc;i++) { 6630 if (PetscBTLookup(change_basis,i)) { 6631 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6632 if (PetscBTLookup(qr_needed_idx,i)) { 6633 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6634 } else { 6635 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6636 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6637 } 6638 } 6639 } 6640 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz)); 6641 PetscCall(MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 6642 PetscCall(PetscFree(nnz)); 6643 /* Set interior change in the matrix */ 6644 if (!pcbddc->benign_change || pcbddc->fake_change) { 6645 for (i=0;i<pcis->n;i++) { 6646 PetscCall(MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES)); 6647 } 6648 } else { 6649 const PetscInt *ii,*jj; 6650 PetscScalar *aa; 6651 PetscInt n; 6652 PetscBool flg_row; 6653 PetscCall(MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6654 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change,&aa)); 6655 for (i=0;i<n;i++) { 6656 PetscCall(MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES)); 6657 } 6658 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change,&aa)); 6659 PetscCall(MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row)); 6660 } 6661 6662 if (pcbddc->dbg_flag) { 6663 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 6664 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank)); 6665 } 6666 6667 /* Now we loop on the constraints which need a change of basis */ 6668 /* 6669 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6670 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6671 6672 Basic blocks of change of basis matrix T computed by 6673 6674 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6675 6676 | 1 0 ... 0 s_1/S | 6677 | 0 1 ... 0 s_2/S | 6678 | ... | 6679 | 0 ... 1 s_{n-1}/S | 6680 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6681 6682 with S = \sum_{i=1}^n s_i^2 6683 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6684 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6685 6686 - QR decomposition of constraints otherwise 6687 */ 6688 if (qr_needed && max_size_of_constraint) { 6689 /* space to store Q */ 6690 PetscCall(PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis)); 6691 /* array to store scaling factors for reflectors */ 6692 PetscCall(PetscMalloc1(max_constraints,&qr_tau)); 6693 /* first we issue queries for optimal work */ 6694 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6695 PetscCall(PetscBLASIntCast(max_constraints,&Blas_N)); 6696 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6697 lqr_work = -1; 6698 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6699 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6700 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work)); 6701 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work)); 6702 lgqr_work = -1; 6703 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_M)); 6704 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_N)); 6705 PetscCall(PetscBLASIntCast(max_constraints,&Blas_K)); 6706 PetscCall(PetscBLASIntCast(max_size_of_constraint,&Blas_LDA)); 6707 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6708 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6709 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6710 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work)); 6711 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work)); 6712 /* array to store rhs and solution of triangular solver */ 6713 PetscCall(PetscMalloc1(max_constraints*max_constraints,&trs_rhs)); 6714 /* allocating workspace for check */ 6715 if (pcbddc->dbg_flag) { 6716 PetscCall(PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work)); 6717 } 6718 } 6719 /* array to store whether a node is primal or not */ 6720 PetscCall(PetscBTCreate(pcis->n_B,&is_primal)); 6721 PetscCall(PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B)); 6722 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B)); 6723 PetscCheckFalse(i != total_primal_vertices,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6724 for (i=0;i<total_primal_vertices;i++) { 6725 PetscCall(PetscBTSet(is_primal,aux_primal_numbering_B[i])); 6726 } 6727 PetscCall(PetscFree(aux_primal_numbering_B)); 6728 6729 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6730 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6731 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6732 if (PetscBTLookup(change_basis,total_counts)) { 6733 /* get constraint info */ 6734 primal_dofs = constraints_n[total_counts]; 6735 dual_dofs = size_of_constraint-primal_dofs; 6736 6737 if (pcbddc->dbg_flag) { 6738 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint)); 6739 } 6740 6741 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6742 6743 /* copy quadrature constraints for change of basis check */ 6744 if (pcbddc->dbg_flag) { 6745 PetscCall(PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6746 } 6747 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6748 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6749 6750 /* compute QR decomposition of constraints */ 6751 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6752 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6753 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6754 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6755 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6756 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6757 PetscCall(PetscFPTrapPop()); 6758 6759 /* explicitly compute R^-T */ 6760 PetscCall(PetscArrayzero(trs_rhs,primal_dofs*primal_dofs)); 6761 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6762 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6763 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_NRHS)); 6764 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6765 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6766 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6767 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6768 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6769 PetscCall(PetscFPTrapPop()); 6770 6771 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6772 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6773 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6774 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6775 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6776 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6777 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6778 PetscCheck(!lierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6779 PetscCall(PetscFPTrapPop()); 6780 6781 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6782 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6783 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6784 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_M)); 6785 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_N)); 6786 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_K)); 6787 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6788 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDB)); 6789 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDC)); 6790 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6791 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6792 PetscCall(PetscFPTrapPop()); 6793 PetscCall(PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs)); 6794 6795 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6796 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6797 /* insert cols for primal dofs */ 6798 for (j=0;j<primal_dofs;j++) { 6799 start_vals = &qr_basis[j*size_of_constraint]; 6800 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6801 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6802 } 6803 /* insert cols for dual dofs */ 6804 for (j=0,k=0;j<dual_dofs;k++) { 6805 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6806 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6807 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6808 PetscCall(MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES)); 6809 j++; 6810 } 6811 } 6812 6813 /* check change of basis */ 6814 if (pcbddc->dbg_flag) { 6815 PetscInt ii,jj; 6816 PetscBool valid_qr=PETSC_TRUE; 6817 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_M)); 6818 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6819 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_K)); 6820 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDA)); 6821 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_LDB)); 6822 PetscCall(PetscBLASIntCast(primal_dofs,&Blas_LDC)); 6823 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6824 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6825 PetscCall(PetscFPTrapPop()); 6826 for (jj=0;jj<size_of_constraint;jj++) { 6827 for (ii=0;ii<primal_dofs;ii++) { 6828 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6829 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6830 } 6831 } 6832 if (!valid_qr) { 6833 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n")); 6834 for (jj=0;jj<size_of_constraint;jj++) { 6835 for (ii=0;ii<primal_dofs;ii++) { 6836 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6837 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6838 } 6839 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6840 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]))); 6841 } 6842 } 6843 } 6844 } else { 6845 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n")); 6846 } 6847 } 6848 } else { /* simple transformation block */ 6849 PetscInt row,col; 6850 PetscScalar val,norm; 6851 6852 PetscCall(PetscBLASIntCast(size_of_constraint,&Blas_N)); 6853 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6854 for (j=0;j<size_of_constraint;j++) { 6855 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6856 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6857 if (!PetscBTLookup(is_primal,row_B)) { 6858 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6859 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES)); 6860 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES)); 6861 } else { 6862 for (k=0;k<size_of_constraint;k++) { 6863 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6864 if (row != col) { 6865 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6866 } else { 6867 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6868 } 6869 PetscCall(MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES)); 6870 } 6871 } 6872 } 6873 if (pcbddc->dbg_flag) { 6874 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n")); 6875 } 6876 } 6877 } else { 6878 if (pcbddc->dbg_flag) { 6879 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint)); 6880 } 6881 } 6882 } 6883 6884 /* free workspace */ 6885 if (qr_needed) { 6886 if (pcbddc->dbg_flag) { 6887 PetscCall(PetscFree(dbg_work)); 6888 } 6889 PetscCall(PetscFree(trs_rhs)); 6890 PetscCall(PetscFree(qr_tau)); 6891 PetscCall(PetscFree(qr_work)); 6892 PetscCall(PetscFree(gqr_work)); 6893 PetscCall(PetscFree(qr_basis)); 6894 } 6895 PetscCall(PetscBTDestroy(&is_primal)); 6896 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6897 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY)); 6898 6899 /* assembling of global change of variable */ 6900 if (!pcbddc->fake_change) { 6901 Mat tmat; 6902 PetscInt bs; 6903 6904 PetscCall(VecGetSize(pcis->vec1_global,&global_size)); 6905 PetscCall(VecGetLocalSize(pcis->vec1_global,&local_size)); 6906 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat)); 6907 PetscCall(MatISSetLocalMat(tmat,localChangeOfBasisMatrix)); 6908 PetscCall(MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY)); 6909 PetscCall(MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY)); 6910 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix)); 6911 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ)); 6912 PetscCall(MatGetBlockSize(pc->pmat,&bs)); 6913 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs)); 6914 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size)); 6915 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE)); 6916 PetscCall(MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix)); 6917 PetscCall(MatDestroy(&tmat)); 6918 PetscCall(VecSet(pcis->vec1_global,0.0)); 6919 PetscCall(VecSet(pcis->vec1_N,1.0)); 6920 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6921 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 6922 PetscCall(VecReciprocal(pcis->vec1_global)); 6923 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL)); 6924 6925 /* check */ 6926 if (pcbddc->dbg_flag) { 6927 PetscReal error; 6928 Vec x,x_change; 6929 6930 PetscCall(VecDuplicate(pcis->vec1_global,&x)); 6931 PetscCall(VecDuplicate(pcis->vec1_global,&x_change)); 6932 PetscCall(VecSetRandom(x,NULL)); 6933 PetscCall(VecCopy(x,pcis->vec1_global)); 6934 PetscCall(VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6935 PetscCall(VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 6936 PetscCall(MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N)); 6937 PetscCall(VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6938 PetscCall(VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE)); 6939 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change)); 6940 PetscCall(VecAXPY(x,-1.0,x_change)); 6941 PetscCall(VecNorm(x,NORM_INFINITY,&error)); 6942 if (error > PETSC_SMALL) { 6943 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6944 } 6945 PetscCall(VecDestroy(&x)); 6946 PetscCall(VecDestroy(&x_change)); 6947 } 6948 /* adapt sub_schurs computed (if any) */ 6949 if (pcbddc->use_deluxe_scaling) { 6950 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6951 6952 PetscCheckFalse(pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6953 if (sub_schurs && sub_schurs->S_Ej_all) { 6954 Mat S_new,tmat; 6955 IS is_all_N,is_V_Sall = NULL; 6956 6957 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N)); 6958 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat)); 6959 if (pcbddc->deluxe_zerorows) { 6960 ISLocalToGlobalMapping NtoSall; 6961 IS is_V; 6962 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V)); 6963 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall)); 6964 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall)); 6965 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6966 PetscCall(ISDestroy(&is_V)); 6967 } 6968 PetscCall(ISDestroy(&is_all_N)); 6969 PetscCall(MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 6970 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6971 PetscCall(PetscObjectReference((PetscObject)S_new)); 6972 if (pcbddc->deluxe_zerorows) { 6973 const PetscScalar *array; 6974 const PetscInt *idxs_V,*idxs_all; 6975 PetscInt i,n_V; 6976 6977 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 6978 PetscCall(ISGetLocalSize(is_V_Sall,&n_V)); 6979 PetscCall(ISGetIndices(is_V_Sall,&idxs_V)); 6980 PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs_all)); 6981 PetscCall(VecGetArrayRead(pcis->D,&array)); 6982 for (i=0;i<n_V;i++) { 6983 PetscScalar val; 6984 PetscInt idx; 6985 6986 idx = idxs_V[i]; 6987 val = array[idxs_all[idxs_V[i]]]; 6988 PetscCall(MatSetValue(S_new,idx,idx,val,INSERT_VALUES)); 6989 } 6990 PetscCall(MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY)); 6991 PetscCall(MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY)); 6992 PetscCall(VecRestoreArrayRead(pcis->D,&array)); 6993 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all)); 6994 PetscCall(ISRestoreIndices(is_V_Sall,&idxs_V)); 6995 } 6996 sub_schurs->S_Ej_all = S_new; 6997 PetscCall(MatDestroy(&S_new)); 6998 if (sub_schurs->sum_S_Ej_all) { 6999 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new)); 7000 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7001 PetscCall(PetscObjectReference((PetscObject)S_new)); 7002 if (pcbddc->deluxe_zerorows) { 7003 PetscCall(MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL)); 7004 } 7005 sub_schurs->sum_S_Ej_all = S_new; 7006 PetscCall(MatDestroy(&S_new)); 7007 } 7008 PetscCall(ISDestroy(&is_V_Sall)); 7009 PetscCall(MatDestroy(&tmat)); 7010 } 7011 /* destroy any change of basis context in sub_schurs */ 7012 if (sub_schurs && sub_schurs->change) { 7013 PetscInt i; 7014 7015 for (i=0;i<sub_schurs->n_subs;i++) { 7016 PetscCall(KSPDestroy(&sub_schurs->change[i])); 7017 } 7018 PetscCall(PetscFree(sub_schurs->change)); 7019 } 7020 } 7021 if (pcbddc->switch_static) { /* need to save the local change */ 7022 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7023 } else { 7024 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7025 } 7026 /* determine if any process has changed the pressures locally */ 7027 pcbddc->change_interior = pcbddc->benign_have_null; 7028 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7029 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7030 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7031 pcbddc->use_qr_single = qr_needed; 7032 } 7033 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7034 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7035 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7036 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7037 } else { 7038 Mat benign_global = NULL; 7039 if (pcbddc->benign_have_null) { 7040 Mat M; 7041 7042 pcbddc->change_interior = PETSC_TRUE; 7043 PetscCall(VecCopy(matis->counter,pcis->vec1_N)); 7044 PetscCall(VecReciprocal(pcis->vec1_N)); 7045 PetscCall(MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global)); 7046 if (pcbddc->benign_change) { 7047 PetscCall(MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M)); 7048 PetscCall(MatDiagonalScale(M,pcis->vec1_N,NULL)); 7049 } else { 7050 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M)); 7051 PetscCall(MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES)); 7052 } 7053 PetscCall(MatISSetLocalMat(benign_global,M)); 7054 PetscCall(MatDestroy(&M)); 7055 PetscCall(MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY)); 7056 PetscCall(MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY)); 7057 } 7058 if (pcbddc->user_ChangeOfBasisMatrix) { 7059 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix)); 7060 PetscCall(MatDestroy(&benign_global)); 7061 } else if (pcbddc->benign_have_null) { 7062 pcbddc->ChangeOfBasisMatrix = benign_global; 7063 } 7064 } 7065 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7066 IS is_global; 7067 const PetscInt *gidxs; 7068 7069 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs)); 7070 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global)); 7071 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs)); 7072 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change)); 7073 PetscCall(ISDestroy(&is_global)); 7074 } 7075 } 7076 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 7077 PetscCall(VecDuplicate(pcis->vec1_global,&pcbddc->work_change)); 7078 } 7079 7080 if (!pcbddc->fake_change) { 7081 /* add pressure dofs to set of primal nodes for numbering purposes */ 7082 for (i=0;i<pcbddc->benign_n;i++) { 7083 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7084 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7085 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7086 pcbddc->local_primal_size_cc++; 7087 pcbddc->local_primal_size++; 7088 } 7089 7090 /* check if a new primal space has been introduced (also take into account benign trick) */ 7091 pcbddc->new_primal_space_local = PETSC_TRUE; 7092 if (olocal_primal_size == pcbddc->local_primal_size) { 7093 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7094 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7095 if (!pcbddc->new_primal_space_local) { 7096 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local)); 7097 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7098 } 7099 } 7100 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7101 PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 7102 } 7103 PetscCall(PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult)); 7104 7105 /* flush dbg viewer */ 7106 if (pcbddc->dbg_flag) { 7107 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7108 } 7109 7110 /* free workspace */ 7111 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7112 PetscCall(PetscBTDestroy(&change_basis)); 7113 if (!pcbddc->adaptive_selection) { 7114 PetscCall(PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n)); 7115 PetscCall(PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B)); 7116 } else { 7117 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 7118 pcbddc->adaptive_constraints_idxs_ptr, 7119 pcbddc->adaptive_constraints_data_ptr, 7120 pcbddc->adaptive_constraints_idxs, 7121 pcbddc->adaptive_constraints_data);PetscCall(ierr); 7122 PetscCall(PetscFree(constraints_n)); 7123 PetscCall(PetscFree(constraints_idxs_B)); 7124 } 7125 PetscFunctionReturn(0); 7126 } 7127 7128 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7129 { 7130 ISLocalToGlobalMapping map; 7131 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7132 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 7133 PetscInt i,N; 7134 PetscBool rcsr = PETSC_FALSE; 7135 7136 PetscFunctionBegin; 7137 if (pcbddc->recompute_topography) { 7138 pcbddc->graphanalyzed = PETSC_FALSE; 7139 /* Reset previously computed graph */ 7140 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7141 /* Init local Graph struct */ 7142 PetscCall(MatGetSize(pc->pmat,&N,NULL)); 7143 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL)); 7144 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount)); 7145 7146 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 7147 PetscCall(PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local)); 7148 } 7149 /* Check validity of the csr graph passed in by the user */ 7150 PetscCheckFalse(pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 7151 7152 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7153 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7154 PetscInt *xadj,*adjncy; 7155 PetscInt nvtxs; 7156 PetscBool flg_row=PETSC_FALSE; 7157 7158 PetscCall(MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7159 if (flg_row) { 7160 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES)); 7161 pcbddc->computed_rowadj = PETSC_TRUE; 7162 } 7163 PetscCall(MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row)); 7164 rcsr = PETSC_TRUE; 7165 } 7166 if (pcbddc->dbg_flag) { 7167 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7168 } 7169 7170 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7171 PetscReal *lcoords; 7172 PetscInt n; 7173 MPI_Datatype dimrealtype; 7174 7175 /* TODO: support for blocked */ 7176 PetscCheckFalse(pcbddc->mat_graph->cnloc != pc->pmat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 7177 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7178 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords)); 7179 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype)); 7180 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7181 PetscCall(PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7182 PetscCall(PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE)); 7183 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7184 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7185 7186 pcbddc->mat_graph->coords = lcoords; 7187 pcbddc->mat_graph->cloc = PETSC_TRUE; 7188 pcbddc->mat_graph->cnloc = n; 7189 } 7190 PetscCheckFalse(pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs,PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 7191 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7192 7193 /* Setup of Graph */ 7194 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 7195 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local)); 7196 7197 /* attach info on disconnected subdomains if present */ 7198 if (pcbddc->n_local_subs) { 7199 PetscInt *local_subs,n,totn; 7200 7201 PetscCall(MatGetLocalSize(matis->A,&n,NULL)); 7202 PetscCall(PetscMalloc1(n,&local_subs)); 7203 for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs; 7204 for (i=0;i<pcbddc->n_local_subs;i++) { 7205 const PetscInt *idxs; 7206 PetscInt nl,j; 7207 7208 PetscCall(ISGetLocalSize(pcbddc->local_subs[i],&nl)); 7209 PetscCall(ISGetIndices(pcbddc->local_subs[i],&idxs)); 7210 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7211 PetscCall(ISRestoreIndices(pcbddc->local_subs[i],&idxs)); 7212 } 7213 for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]); 7214 pcbddc->mat_graph->n_local_subs = totn + 1; 7215 pcbddc->mat_graph->local_subs = local_subs; 7216 } 7217 } 7218 7219 if (!pcbddc->graphanalyzed) { 7220 /* Graph's connected components analysis */ 7221 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7222 pcbddc->graphanalyzed = PETSC_TRUE; 7223 pcbddc->corner_selected = pcbddc->corner_selection; 7224 } 7225 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7226 PetscFunctionReturn(0); 7227 } 7228 7229 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7230 { 7231 PetscInt i,j,n; 7232 PetscScalar *alphas; 7233 PetscReal norm,*onorms; 7234 7235 PetscFunctionBegin; 7236 n = *nio; 7237 if (!n) PetscFunctionReturn(0); 7238 PetscCall(PetscMalloc2(n,&alphas,n,&onorms)); 7239 PetscCall(VecNormalize(vecs[0],&norm)); 7240 if (norm < PETSC_SMALL) { 7241 onorms[0] = 0.0; 7242 PetscCall(VecSet(vecs[0],0.0)); 7243 } else { 7244 onorms[0] = norm; 7245 } 7246 7247 for (i=1;i<n;i++) { 7248 PetscCall(VecMDot(vecs[i],i,vecs,alphas)); 7249 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7250 PetscCall(VecMAXPY(vecs[i],i,alphas,vecs)); 7251 PetscCall(VecNormalize(vecs[i],&norm)); 7252 if (norm < PETSC_SMALL) { 7253 onorms[i] = 0.0; 7254 PetscCall(VecSet(vecs[i],0.0)); 7255 } else { 7256 onorms[i] = norm; 7257 } 7258 } 7259 /* push nonzero vectors at the beginning */ 7260 for (i=0;i<n;i++) { 7261 if (onorms[i] == 0.0) { 7262 for (j=i+1;j<n;j++) { 7263 if (onorms[j] != 0.0) { 7264 PetscCall(VecCopy(vecs[j],vecs[i])); 7265 onorms[j] = 0.0; 7266 } 7267 } 7268 } 7269 } 7270 for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7271 PetscCall(PetscFree2(alphas,onorms)); 7272 PetscFunctionReturn(0); 7273 } 7274 7275 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7276 { 7277 ISLocalToGlobalMapping mapping; 7278 Mat A; 7279 PetscInt n_neighs,*neighs,*n_shared,**shared; 7280 PetscMPIInt size,rank,color; 7281 PetscInt *xadj,*adjncy; 7282 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7283 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7284 PetscInt void_procs,*procs_candidates = NULL; 7285 PetscInt xadj_count,*count; 7286 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7287 PetscSubcomm psubcomm; 7288 MPI_Comm subcomm; 7289 7290 PetscFunctionBegin; 7291 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7292 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7293 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7294 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7295 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7296 PetscCheckFalse(*n_subdomains <=0,PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7297 7298 if (have_void) *have_void = PETSC_FALSE; 7299 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size)); 7300 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 7301 PetscCall(MatISGetLocalMat(mat,&A)); 7302 PetscCall(MatGetLocalSize(A,&n,NULL)); 7303 im_active = !!n; 7304 PetscCallMPI(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat))); 7305 void_procs = size - active_procs; 7306 /* get ranks of of non-active processes in mat communicator */ 7307 if (void_procs) { 7308 PetscInt ncand; 7309 7310 if (have_void) *have_void = PETSC_TRUE; 7311 PetscCall(PetscMalloc1(size,&procs_candidates)); 7312 PetscCallMPI(MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 7313 for (i=0,ncand=0;i<size;i++) { 7314 if (!procs_candidates[i]) { 7315 procs_candidates[ncand++] = i; 7316 } 7317 } 7318 /* force n_subdomains to be not greater that the number of non-active processes */ 7319 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7320 } 7321 7322 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7323 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7324 PetscCall(MatGetSize(mat,&N,NULL)); 7325 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7326 PetscInt issize,isidx,dest; 7327 if (*n_subdomains == 1) dest = 0; 7328 else dest = rank; 7329 if (im_active) { 7330 issize = 1; 7331 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7332 isidx = procs_candidates[dest]; 7333 } else { 7334 isidx = dest; 7335 } 7336 } else { 7337 issize = 0; 7338 isidx = -1; 7339 } 7340 if (*n_subdomains != 1) *n_subdomains = active_procs; 7341 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends)); 7342 PetscCall(PetscFree(procs_candidates)); 7343 PetscFunctionReturn(0); 7344 } 7345 PetscCall(PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL)); 7346 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL)); 7347 threshold = PetscMax(threshold,2); 7348 7349 /* Get info on mapping */ 7350 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7351 PetscCall(ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7352 7353 /* build local CSR graph of subdomains' connectivity */ 7354 PetscCall(PetscMalloc1(2,&xadj)); 7355 xadj[0] = 0; 7356 xadj[1] = PetscMax(n_neighs-1,0); 7357 PetscCall(PetscMalloc1(xadj[1],&adjncy)); 7358 PetscCall(PetscMalloc1(xadj[1],&adjncy_wgt)); 7359 PetscCall(PetscCalloc1(n,&count)); 7360 for (i=1;i<n_neighs;i++) 7361 for (j=0;j<n_shared[i];j++) 7362 count[shared[i][j]] += 1; 7363 7364 xadj_count = 0; 7365 for (i=1;i<n_neighs;i++) { 7366 for (j=0;j<n_shared[i];j++) { 7367 if (count[shared[i][j]] < threshold) { 7368 adjncy[xadj_count] = neighs[i]; 7369 adjncy_wgt[xadj_count] = n_shared[i]; 7370 xadj_count++; 7371 break; 7372 } 7373 } 7374 } 7375 xadj[1] = xadj_count; 7376 PetscCall(PetscFree(count)); 7377 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared)); 7378 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7379 7380 PetscCall(PetscMalloc1(1,&ranks_send_to_idx)); 7381 7382 /* Restrict work on active processes only */ 7383 PetscCall(PetscMPIIntCast(im_active,&color)); 7384 if (void_procs) { 7385 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm)); 7386 PetscCall(PetscSubcommSetNumber(psubcomm,2)); /* 2 groups, active process and not active processes */ 7387 PetscCall(PetscSubcommSetTypeGeneral(psubcomm,color,rank)); 7388 subcomm = PetscSubcommChild(psubcomm); 7389 } else { 7390 psubcomm = NULL; 7391 subcomm = PetscObjectComm((PetscObject)mat); 7392 } 7393 7394 v_wgt = NULL; 7395 if (!color) { 7396 PetscCall(PetscFree(xadj)); 7397 PetscCall(PetscFree(adjncy)); 7398 PetscCall(PetscFree(adjncy_wgt)); 7399 } else { 7400 Mat subdomain_adj; 7401 IS new_ranks,new_ranks_contig; 7402 MatPartitioning partitioner; 7403 PetscInt rstart=0,rend=0; 7404 PetscInt *is_indices,*oldranks; 7405 PetscMPIInt size; 7406 PetscBool aggregate; 7407 7408 PetscCallMPI(MPI_Comm_size(subcomm,&size)); 7409 if (void_procs) { 7410 PetscInt prank = rank; 7411 PetscCall(PetscMalloc1(size,&oldranks)); 7412 PetscCallMPI(MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm)); 7413 for (i=0;i<xadj[1];i++) { 7414 PetscCall(PetscFindInt(adjncy[i],size,oldranks,&adjncy[i])); 7415 } 7416 PetscCall(PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt)); 7417 } else { 7418 oldranks = NULL; 7419 } 7420 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7421 if (aggregate) { /* TODO: all this part could be made more efficient */ 7422 PetscInt lrows,row,ncols,*cols; 7423 PetscMPIInt nrank; 7424 PetscScalar *vals; 7425 7426 PetscCallMPI(MPI_Comm_rank(subcomm,&nrank)); 7427 lrows = 0; 7428 if (nrank<redprocs) { 7429 lrows = size/redprocs; 7430 if (nrank<size%redprocs) lrows++; 7431 } 7432 PetscCall(MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj)); 7433 PetscCall(MatGetOwnershipRange(subdomain_adj,&rstart,&rend)); 7434 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 7435 PetscCall(MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 7436 row = nrank; 7437 ncols = xadj[1]-xadj[0]; 7438 cols = adjncy; 7439 PetscCall(PetscMalloc1(ncols,&vals)); 7440 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7441 PetscCall(MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES)); 7442 PetscCall(MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7443 PetscCall(MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY)); 7444 PetscCall(PetscFree(xadj)); 7445 PetscCall(PetscFree(adjncy)); 7446 PetscCall(PetscFree(adjncy_wgt)); 7447 PetscCall(PetscFree(vals)); 7448 if (use_vwgt) { 7449 Vec v; 7450 const PetscScalar *array; 7451 PetscInt nl; 7452 7453 PetscCall(MatCreateVecs(subdomain_adj,&v,NULL)); 7454 PetscCall(VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES)); 7455 PetscCall(VecAssemblyBegin(v)); 7456 PetscCall(VecAssemblyEnd(v)); 7457 PetscCall(VecGetLocalSize(v,&nl)); 7458 PetscCall(VecGetArrayRead(v,&array)); 7459 PetscCall(PetscMalloc1(nl,&v_wgt)); 7460 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7461 PetscCall(VecRestoreArrayRead(v,&array)); 7462 PetscCall(VecDestroy(&v)); 7463 } 7464 } else { 7465 PetscCall(MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj)); 7466 if (use_vwgt) { 7467 PetscCall(PetscMalloc1(1,&v_wgt)); 7468 v_wgt[0] = n; 7469 } 7470 } 7471 /* PetscCall(MatView(subdomain_adj,0)); */ 7472 7473 /* Partition */ 7474 PetscCall(MatPartitioningCreate(subcomm,&partitioner)); 7475 #if defined(PETSC_HAVE_PTSCOTCH) 7476 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH)); 7477 #elif defined(PETSC_HAVE_PARMETIS) 7478 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS)); 7479 #else 7480 PetscCall(MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE)); 7481 #endif 7482 PetscCall(MatPartitioningSetAdjacency(partitioner,subdomain_adj)); 7483 if (v_wgt) { 7484 PetscCall(MatPartitioningSetVertexWeights(partitioner,v_wgt)); 7485 } 7486 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7487 PetscCall(MatPartitioningSetNParts(partitioner,*n_subdomains)); 7488 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7489 PetscCall(MatPartitioningApply(partitioner,&new_ranks)); 7490 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7491 7492 /* renumber new_ranks to avoid "holes" in new set of processors */ 7493 PetscCall(ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig)); 7494 PetscCall(ISDestroy(&new_ranks)); 7495 PetscCall(ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7496 if (!aggregate) { 7497 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7498 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7499 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7500 } else if (oldranks) { 7501 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7502 } else { 7503 ranks_send_to_idx[0] = is_indices[0]; 7504 } 7505 } else { 7506 PetscInt idx = 0; 7507 PetscMPIInt tag; 7508 MPI_Request *reqs; 7509 7510 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag)); 7511 PetscCall(PetscMalloc1(rend-rstart,&reqs)); 7512 for (i=rstart;i<rend;i++) { 7513 PetscCallMPI(MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart])); 7514 } 7515 PetscCallMPI(MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE)); 7516 PetscCallMPI(MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE)); 7517 PetscCall(PetscFree(reqs)); 7518 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7519 PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7520 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7521 } else if (oldranks) { 7522 ranks_send_to_idx[0] = oldranks[idx]; 7523 } else { 7524 ranks_send_to_idx[0] = idx; 7525 } 7526 } 7527 PetscCall(ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices)); 7528 /* clean up */ 7529 PetscCall(PetscFree(oldranks)); 7530 PetscCall(ISDestroy(&new_ranks_contig)); 7531 PetscCall(MatDestroy(&subdomain_adj)); 7532 PetscCall(MatPartitioningDestroy(&partitioner)); 7533 } 7534 PetscCall(PetscSubcommDestroy(&psubcomm)); 7535 PetscCall(PetscFree(procs_candidates)); 7536 7537 /* assemble parallel IS for sends */ 7538 i = 1; 7539 if (!color) i=0; 7540 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends)); 7541 PetscFunctionReturn(0); 7542 } 7543 7544 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7545 7546 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7547 { 7548 Mat local_mat; 7549 IS is_sends_internal; 7550 PetscInt rows,cols,new_local_rows; 7551 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7552 PetscBool ismatis,isdense,newisdense,destroy_mat; 7553 ISLocalToGlobalMapping l2gmap; 7554 PetscInt* l2gmap_indices; 7555 const PetscInt* is_indices; 7556 MatType new_local_type; 7557 /* buffers */ 7558 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7559 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7560 PetscInt *recv_buffer_idxs_local; 7561 PetscScalar *ptr_vals,*recv_buffer_vals; 7562 const PetscScalar *send_buffer_vals; 7563 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7564 /* MPI */ 7565 MPI_Comm comm,comm_n; 7566 PetscSubcomm subcomm; 7567 PetscMPIInt n_sends,n_recvs,size; 7568 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7569 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7570 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7571 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7572 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7573 7574 PetscFunctionBegin; 7575 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7576 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis)); 7577 PetscCheck(ismatis,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7578 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7579 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7580 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7581 PetscValidLogicalCollectiveBool(mat,reuse,6); 7582 PetscValidLogicalCollectiveInt(mat,nis,8); 7583 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7584 if (nvecs) { 7585 PetscCheckFalse(nvecs > 1,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7586 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7587 } 7588 /* further checks */ 7589 PetscCall(MatISGetLocalMat(mat,&local_mat)); 7590 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense)); 7591 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7592 PetscCall(MatGetSize(local_mat,&rows,&cols)); 7593 PetscCheck(rows == cols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7594 if (reuse && *mat_n) { 7595 PetscInt mrows,mcols,mnrows,mncols; 7596 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7597 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis)); 7598 PetscCheck(ismatis,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7599 PetscCall(MatGetSize(mat,&mrows,&mcols)); 7600 PetscCall(MatGetSize(*mat_n,&mnrows,&mncols)); 7601 PetscCheck(mrows == mnrows,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7602 PetscCheck(mcols == mncols,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7603 } 7604 PetscCall(MatGetBlockSize(local_mat,&bs)); 7605 PetscValidLogicalCollectiveInt(mat,bs,1); 7606 7607 /* prepare IS for sending if not provided */ 7608 if (!is_sends) { 7609 PetscCheck(n_subdomains,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7610 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL)); 7611 } else { 7612 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7613 is_sends_internal = is_sends; 7614 } 7615 7616 /* get comm */ 7617 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 7618 7619 /* compute number of sends */ 7620 PetscCall(ISGetLocalSize(is_sends_internal,&i)); 7621 PetscCall(PetscMPIIntCast(i,&n_sends)); 7622 7623 /* compute number of receives */ 7624 PetscCallMPI(MPI_Comm_size(comm,&size)); 7625 PetscCall(PetscMalloc1(size,&iflags)); 7626 PetscCall(PetscArrayzero(iflags,size)); 7627 PetscCall(ISGetIndices(is_sends_internal,&is_indices)); 7628 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7629 PetscCall(PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs)); 7630 PetscCall(PetscFree(iflags)); 7631 7632 /* restrict comm if requested */ 7633 subcomm = NULL; 7634 destroy_mat = PETSC_FALSE; 7635 if (restrict_comm) { 7636 PetscMPIInt color,subcommsize; 7637 7638 color = 0; 7639 if (restrict_full) { 7640 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7641 } else { 7642 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7643 } 7644 PetscCallMPI(MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm)); 7645 subcommsize = size - subcommsize; 7646 /* check if reuse has been requested */ 7647 if (reuse) { 7648 if (*mat_n) { 7649 PetscMPIInt subcommsize2; 7650 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2)); 7651 PetscCheck(subcommsize == subcommsize2,PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7652 comm_n = PetscObjectComm((PetscObject)*mat_n); 7653 } else { 7654 comm_n = PETSC_COMM_SELF; 7655 } 7656 } else { /* MAT_INITIAL_MATRIX */ 7657 PetscMPIInt rank; 7658 7659 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 7660 PetscCall(PetscSubcommCreate(comm,&subcomm)); 7661 PetscCall(PetscSubcommSetNumber(subcomm,2)); 7662 PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 7663 comm_n = PetscSubcommChild(subcomm); 7664 } 7665 /* flag to destroy *mat_n if not significative */ 7666 if (color) destroy_mat = PETSC_TRUE; 7667 } else { 7668 comm_n = comm; 7669 } 7670 7671 /* prepare send/receive buffers */ 7672 PetscCall(PetscMalloc1(size,&ilengths_idxs)); 7673 PetscCall(PetscArrayzero(ilengths_idxs,size)); 7674 PetscCall(PetscMalloc1(size,&ilengths_vals)); 7675 PetscCall(PetscArrayzero(ilengths_vals,size)); 7676 if (nis) { 7677 PetscCall(PetscCalloc1(size,&ilengths_idxs_is)); 7678 } 7679 7680 /* Get data from local matrices */ 7681 PetscCheck(isdense,PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7682 /* TODO: See below some guidelines on how to prepare the local buffers */ 7683 /* 7684 send_buffer_vals should contain the raw values of the local matrix 7685 send_buffer_idxs should contain: 7686 - MatType_PRIVATE type 7687 - PetscInt size_of_l2gmap 7688 - PetscInt global_row_indices[size_of_l2gmap] 7689 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7690 */ 7691 { 7692 ISLocalToGlobalMapping mapping; 7693 7694 PetscCall(MatISGetLocalToGlobalMapping(mat,&mapping,NULL)); 7695 PetscCall(MatDenseGetArrayRead(local_mat,&send_buffer_vals)); 7696 PetscCall(ISLocalToGlobalMappingGetSize(mapping,&i)); 7697 PetscCall(PetscMalloc1(i+2,&send_buffer_idxs)); 7698 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7699 send_buffer_idxs[1] = i; 7700 PetscCall(ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs)); 7701 PetscCall(PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i)); 7702 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs)); 7703 PetscCall(PetscMPIIntCast(i,&len)); 7704 for (i=0;i<n_sends;i++) { 7705 ilengths_vals[is_indices[i]] = len*len; 7706 ilengths_idxs[is_indices[i]] = len+2; 7707 } 7708 } 7709 PetscCall(PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals)); 7710 /* additional is (if any) */ 7711 if (nis) { 7712 PetscMPIInt psum; 7713 PetscInt j; 7714 for (j=0,psum=0;j<nis;j++) { 7715 PetscInt plen; 7716 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7717 PetscCall(PetscMPIIntCast(plen,&len)); 7718 psum += len+1; /* indices + lenght */ 7719 } 7720 PetscCall(PetscMalloc1(psum,&send_buffer_idxs_is)); 7721 for (j=0,psum=0;j<nis;j++) { 7722 PetscInt plen; 7723 const PetscInt *is_array_idxs; 7724 PetscCall(ISGetLocalSize(isarray[j],&plen)); 7725 send_buffer_idxs_is[psum] = plen; 7726 PetscCall(ISGetIndices(isarray[j],&is_array_idxs)); 7727 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen)); 7728 PetscCall(ISRestoreIndices(isarray[j],&is_array_idxs)); 7729 psum += plen+1; /* indices + lenght */ 7730 } 7731 for (i=0;i<n_sends;i++) { 7732 ilengths_idxs_is[is_indices[i]] = psum; 7733 } 7734 PetscCall(PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is)); 7735 } 7736 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 7737 7738 buf_size_idxs = 0; 7739 buf_size_vals = 0; 7740 buf_size_idxs_is = 0; 7741 buf_size_vecs = 0; 7742 for (i=0;i<n_recvs;i++) { 7743 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7744 buf_size_vals += (PetscInt)olengths_vals[i]; 7745 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7746 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7747 } 7748 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs)); 7749 PetscCall(PetscMalloc1(buf_size_vals,&recv_buffer_vals)); 7750 PetscCall(PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is)); 7751 PetscCall(PetscMalloc1(buf_size_vecs,&recv_buffer_vecs)); 7752 7753 /* get new tags for clean communications */ 7754 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs)); 7755 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vals)); 7756 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is)); 7757 PetscCall(PetscObjectGetNewTag((PetscObject)mat,&tag_vecs)); 7758 7759 /* allocate for requests */ 7760 PetscCall(PetscMalloc1(n_sends,&send_req_idxs)); 7761 PetscCall(PetscMalloc1(n_sends,&send_req_vals)); 7762 PetscCall(PetscMalloc1(n_sends,&send_req_idxs_is)); 7763 PetscCall(PetscMalloc1(n_sends,&send_req_vecs)); 7764 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs)); 7765 PetscCall(PetscMalloc1(n_recvs,&recv_req_vals)); 7766 PetscCall(PetscMalloc1(n_recvs,&recv_req_idxs_is)); 7767 PetscCall(PetscMalloc1(n_recvs,&recv_req_vecs)); 7768 7769 /* communications */ 7770 ptr_idxs = recv_buffer_idxs; 7771 ptr_vals = recv_buffer_vals; 7772 ptr_idxs_is = recv_buffer_idxs_is; 7773 ptr_vecs = recv_buffer_vecs; 7774 for (i=0;i<n_recvs;i++) { 7775 source_dest = onodes[i]; 7776 PetscCallMPI(MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i])); 7777 PetscCallMPI(MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i])); 7778 ptr_idxs += olengths_idxs[i]; 7779 ptr_vals += olengths_vals[i]; 7780 if (nis) { 7781 source_dest = onodes_is[i]; 7782 PetscCallMPI(MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i])); 7783 ptr_idxs_is += olengths_idxs_is[i]; 7784 } 7785 if (nvecs) { 7786 source_dest = onodes[i]; 7787 PetscCallMPI(MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i])); 7788 ptr_vecs += olengths_idxs[i]-2; 7789 } 7790 } 7791 for (i=0;i<n_sends;i++) { 7792 PetscCall(PetscMPIIntCast(is_indices[i],&source_dest)); 7793 PetscCallMPI(MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i])); 7794 PetscCallMPI(MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i])); 7795 if (nis) { 7796 PetscCallMPI(MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i])); 7797 } 7798 if (nvecs) { 7799 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 7800 PetscCallMPI(MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i])); 7801 } 7802 } 7803 PetscCall(ISRestoreIndices(is_sends_internal,&is_indices)); 7804 PetscCall(ISDestroy(&is_sends_internal)); 7805 7806 /* assemble new l2g map */ 7807 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE)); 7808 ptr_idxs = recv_buffer_idxs; 7809 new_local_rows = 0; 7810 for (i=0;i<n_recvs;i++) { 7811 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7812 ptr_idxs += olengths_idxs[i]; 7813 } 7814 PetscCall(PetscMalloc1(new_local_rows,&l2gmap_indices)); 7815 ptr_idxs = recv_buffer_idxs; 7816 new_local_rows = 0; 7817 for (i=0;i<n_recvs;i++) { 7818 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1))); 7819 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7820 ptr_idxs += olengths_idxs[i]; 7821 } 7822 PetscCall(PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices)); 7823 PetscCall(ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap)); 7824 PetscCall(PetscFree(l2gmap_indices)); 7825 7826 /* infer new local matrix type from received local matrices type */ 7827 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7828 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7829 if (n_recvs) { 7830 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7831 ptr_idxs = recv_buffer_idxs; 7832 for (i=0;i<n_recvs;i++) { 7833 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7834 new_local_type_private = MATAIJ_PRIVATE; 7835 break; 7836 } 7837 ptr_idxs += olengths_idxs[i]; 7838 } 7839 switch (new_local_type_private) { 7840 case MATDENSE_PRIVATE: 7841 new_local_type = MATSEQAIJ; 7842 bs = 1; 7843 break; 7844 case MATAIJ_PRIVATE: 7845 new_local_type = MATSEQAIJ; 7846 bs = 1; 7847 break; 7848 case MATBAIJ_PRIVATE: 7849 new_local_type = MATSEQBAIJ; 7850 break; 7851 case MATSBAIJ_PRIVATE: 7852 new_local_type = MATSEQSBAIJ; 7853 break; 7854 default: 7855 SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7856 } 7857 } else { /* by default, new_local_type is seqaij */ 7858 new_local_type = MATSEQAIJ; 7859 bs = 1; 7860 } 7861 7862 /* create MATIS object if needed */ 7863 if (!reuse) { 7864 PetscCall(MatGetSize(mat,&rows,&cols)); 7865 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7866 } else { 7867 /* it also destroys the local matrices */ 7868 if (*mat_n) { 7869 PetscCall(MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap)); 7870 } else { /* this is a fake object */ 7871 PetscCall(MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n)); 7872 } 7873 } 7874 PetscCall(MatISGetLocalMat(*mat_n,&local_mat)); 7875 PetscCall(MatSetType(local_mat,new_local_type)); 7876 7877 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE)); 7878 7879 /* Global to local map of received indices */ 7880 PetscCall(PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local)); /* needed for values insertion */ 7881 PetscCall(ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local)); 7882 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7883 7884 /* restore attributes -> type of incoming data and its size */ 7885 buf_size_idxs = 0; 7886 for (i=0;i<n_recvs;i++) { 7887 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7888 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7889 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7890 } 7891 PetscCall(PetscFree(recv_buffer_idxs)); 7892 7893 /* set preallocation */ 7894 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense)); 7895 if (!newisdense) { 7896 PetscInt *new_local_nnz=NULL; 7897 7898 ptr_idxs = recv_buffer_idxs_local; 7899 if (n_recvs) { 7900 PetscCall(PetscCalloc1(new_local_rows,&new_local_nnz)); 7901 } 7902 for (i=0;i<n_recvs;i++) { 7903 PetscInt j; 7904 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7905 for (j=0;j<*(ptr_idxs+1);j++) { 7906 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7907 } 7908 } else { 7909 /* TODO */ 7910 } 7911 ptr_idxs += olengths_idxs[i]; 7912 } 7913 if (new_local_nnz) { 7914 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7915 PetscCall(MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz)); 7916 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7917 PetscCall(MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7918 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7919 PetscCall(MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz)); 7920 } else { 7921 PetscCall(MatSetUp(local_mat)); 7922 } 7923 PetscCall(PetscFree(new_local_nnz)); 7924 } else { 7925 PetscCall(MatSetUp(local_mat)); 7926 } 7927 7928 /* set values */ 7929 ptr_vals = recv_buffer_vals; 7930 ptr_idxs = recv_buffer_idxs_local; 7931 for (i=0;i<n_recvs;i++) { 7932 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7933 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 7934 PetscCall(MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES)); 7935 PetscCall(MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY)); 7936 PetscCall(MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY)); 7937 PetscCall(MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 7938 } else { 7939 /* TODO */ 7940 } 7941 ptr_idxs += olengths_idxs[i]; 7942 ptr_vals += olengths_vals[i]; 7943 } 7944 PetscCall(MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY)); 7945 PetscCall(MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY)); 7946 PetscCall(MatISRestoreLocalMat(*mat_n,&local_mat)); 7947 PetscCall(MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY)); 7948 PetscCall(MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY)); 7949 PetscCall(PetscFree(recv_buffer_vals)); 7950 7951 #if 0 7952 if (!restrict_comm) { /* check */ 7953 Vec lvec,rvec; 7954 PetscReal infty_error; 7955 7956 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7957 PetscCall(VecSetRandom(rvec,NULL)); 7958 PetscCall(MatMult(mat,rvec,lvec)); 7959 PetscCall(VecScale(lvec,-1.0)); 7960 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7961 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7962 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7963 PetscCall(VecDestroy(&rvec)); 7964 PetscCall(VecDestroy(&lvec)); 7965 } 7966 #endif 7967 7968 /* assemble new additional is (if any) */ 7969 if (nis) { 7970 PetscInt **temp_idxs,*count_is,j,psum; 7971 7972 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE)); 7973 PetscCall(PetscCalloc1(nis,&count_is)); 7974 ptr_idxs = recv_buffer_idxs_is; 7975 psum = 0; 7976 for (i=0;i<n_recvs;i++) { 7977 for (j=0;j<nis;j++) { 7978 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7979 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7980 psum += plen; 7981 ptr_idxs += plen+1; /* shift pointer to received data */ 7982 } 7983 } 7984 PetscCall(PetscMalloc1(nis,&temp_idxs)); 7985 PetscCall(PetscMalloc1(psum,&temp_idxs[0])); 7986 for (i=1;i<nis;i++) { 7987 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7988 } 7989 PetscCall(PetscArrayzero(count_is,nis)); 7990 ptr_idxs = recv_buffer_idxs_is; 7991 for (i=0;i<n_recvs;i++) { 7992 for (j=0;j<nis;j++) { 7993 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7994 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen)); 7995 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7996 ptr_idxs += plen+1; /* shift pointer to received data */ 7997 } 7998 } 7999 for (i=0;i<nis;i++) { 8000 PetscCall(ISDestroy(&isarray[i])); 8001 PetscCall(PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i])); 8002 PetscCall(ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i])); 8003 } 8004 PetscCall(PetscFree(count_is)); 8005 PetscCall(PetscFree(temp_idxs[0])); 8006 PetscCall(PetscFree(temp_idxs)); 8007 } 8008 /* free workspace */ 8009 PetscCall(PetscFree(recv_buffer_idxs_is)); 8010 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE)); 8011 PetscCall(PetscFree(send_buffer_idxs)); 8012 PetscCallMPI(MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE)); 8013 if (isdense) { 8014 PetscCall(MatISGetLocalMat(mat,&local_mat)); 8015 PetscCall(MatDenseRestoreArrayRead(local_mat,&send_buffer_vals)); 8016 PetscCall(MatISRestoreLocalMat(mat,&local_mat)); 8017 } else { 8018 /* PetscCall(PetscFree(send_buffer_vals)); */ 8019 } 8020 if (nis) { 8021 PetscCallMPI(MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE)); 8022 PetscCall(PetscFree(send_buffer_idxs_is)); 8023 } 8024 8025 if (nvecs) { 8026 PetscCallMPI(MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE)); 8027 PetscCallMPI(MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE)); 8028 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8029 PetscCall(VecDestroy(&nnsp_vec[0])); 8030 PetscCall(VecCreate(comm_n,&nnsp_vec[0])); 8031 PetscCall(VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE)); 8032 PetscCall(VecSetType(nnsp_vec[0],VECSTANDARD)); 8033 /* set values */ 8034 ptr_vals = recv_buffer_vecs; 8035 ptr_idxs = recv_buffer_idxs_local; 8036 PetscCall(VecGetArray(nnsp_vec[0],&send_buffer_vecs)); 8037 for (i=0;i<n_recvs;i++) { 8038 PetscInt j; 8039 for (j=0;j<*(ptr_idxs+1);j++) { 8040 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 8041 } 8042 ptr_idxs += olengths_idxs[i]; 8043 ptr_vals += olengths_idxs[i]-2; 8044 } 8045 PetscCall(VecRestoreArray(nnsp_vec[0],&send_buffer_vecs)); 8046 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8047 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8048 } 8049 8050 PetscCall(PetscFree(recv_buffer_vecs)); 8051 PetscCall(PetscFree(recv_buffer_idxs_local)); 8052 PetscCall(PetscFree(recv_req_idxs)); 8053 PetscCall(PetscFree(recv_req_vals)); 8054 PetscCall(PetscFree(recv_req_vecs)); 8055 PetscCall(PetscFree(recv_req_idxs_is)); 8056 PetscCall(PetscFree(send_req_idxs)); 8057 PetscCall(PetscFree(send_req_vals)); 8058 PetscCall(PetscFree(send_req_vecs)); 8059 PetscCall(PetscFree(send_req_idxs_is)); 8060 PetscCall(PetscFree(ilengths_vals)); 8061 PetscCall(PetscFree(ilengths_idxs)); 8062 PetscCall(PetscFree(olengths_vals)); 8063 PetscCall(PetscFree(olengths_idxs)); 8064 PetscCall(PetscFree(onodes)); 8065 if (nis) { 8066 PetscCall(PetscFree(ilengths_idxs_is)); 8067 PetscCall(PetscFree(olengths_idxs_is)); 8068 PetscCall(PetscFree(onodes_is)); 8069 } 8070 PetscCall(PetscSubcommDestroy(&subcomm)); 8071 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 8072 PetscCall(MatDestroy(mat_n)); 8073 for (i=0;i<nis;i++) { 8074 PetscCall(ISDestroy(&isarray[i])); 8075 } 8076 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8077 PetscCall(VecDestroy(&nnsp_vec[0])); 8078 } 8079 *mat_n = NULL; 8080 } 8081 PetscFunctionReturn(0); 8082 } 8083 8084 /* temporary hack into ksp private data structure */ 8085 #include <petsc/private/kspimpl.h> 8086 8087 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 8088 { 8089 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 8090 PC_IS *pcis = (PC_IS*)pc->data; 8091 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 8092 Mat coarsedivudotp = NULL; 8093 Mat coarseG,t_coarse_mat_is; 8094 MatNullSpace CoarseNullSpace = NULL; 8095 ISLocalToGlobalMapping coarse_islg; 8096 IS coarse_is,*isarray,corners; 8097 PetscInt i,im_active=-1,active_procs=-1; 8098 PetscInt nis,nisdofs,nisneu,nisvert; 8099 PetscInt coarse_eqs_per_proc; 8100 PC pc_temp; 8101 PCType coarse_pc_type; 8102 KSPType coarse_ksp_type; 8103 PetscBool multilevel_requested,multilevel_allowed; 8104 PetscBool coarse_reuse; 8105 PetscInt ncoarse,nedcfield; 8106 PetscBool compute_vecs = PETSC_FALSE; 8107 PetscScalar *array; 8108 MatReuse coarse_mat_reuse; 8109 PetscBool restr, full_restr, have_void; 8110 PetscMPIInt size; 8111 PetscErrorCode ierr; 8112 8113 PetscFunctionBegin; 8114 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8115 /* Assign global numbering to coarse dofs */ 8116 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8117 PetscInt ocoarse_size; 8118 compute_vecs = PETSC_TRUE; 8119 8120 pcbddc->new_primal_space = PETSC_TRUE; 8121 ocoarse_size = pcbddc->coarse_size; 8122 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8123 PetscCall(PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices)); 8124 /* see if we can avoid some work */ 8125 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8126 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8127 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8128 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8129 coarse_reuse = PETSC_FALSE; 8130 } else { /* we can safely reuse already computed coarse matrix */ 8131 coarse_reuse = PETSC_TRUE; 8132 } 8133 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8134 coarse_reuse = PETSC_FALSE; 8135 } 8136 /* reset any subassembling information */ 8137 if (!coarse_reuse || pcbddc->recompute_topography) { 8138 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8139 } 8140 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8141 coarse_reuse = PETSC_TRUE; 8142 } 8143 if (coarse_reuse && pcbddc->coarse_ksp) { 8144 PetscCall(KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL)); 8145 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8146 coarse_mat_reuse = MAT_REUSE_MATRIX; 8147 } else { 8148 coarse_mat = NULL; 8149 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8150 } 8151 8152 /* creates temporary l2gmap and IS for coarse indexes */ 8153 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is)); 8154 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg)); 8155 8156 /* creates temporary MATIS object for coarse matrix */ 8157 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense)); 8158 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is)); 8159 PetscCall(MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense)); 8160 PetscCall(MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8161 PetscCall(MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY)); 8162 PetscCall(MatDestroy(&coarse_submat_dense)); 8163 8164 /* count "active" (i.e. with positive local size) and "void" processes */ 8165 im_active = !!(pcis->n); 8166 PetscCallMPI(MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8167 8168 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8169 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8170 /* full_restr : just use the receivers from the subassembling pattern */ 8171 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 8172 coarse_mat_is = NULL; 8173 multilevel_allowed = PETSC_FALSE; 8174 multilevel_requested = PETSC_FALSE; 8175 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 8176 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 8177 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8178 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8179 if (multilevel_requested) { 8180 ncoarse = active_procs/pcbddc->coarsening_ratio; 8181 restr = PETSC_FALSE; 8182 full_restr = PETSC_FALSE; 8183 } else { 8184 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 8185 restr = PETSC_TRUE; 8186 full_restr = PETSC_TRUE; 8187 } 8188 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8189 ncoarse = PetscMax(1,ncoarse); 8190 if (!pcbddc->coarse_subassembling) { 8191 if (pcbddc->coarsening_ratio > 1) { 8192 if (multilevel_requested) { 8193 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8194 } else { 8195 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void)); 8196 } 8197 } else { 8198 PetscMPIInt rank; 8199 8200 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank)); 8201 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8202 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling)); 8203 } 8204 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8205 PetscInt psum; 8206 if (pcbddc->coarse_ksp) psum = 1; 8207 else psum = 0; 8208 PetscCallMPI(MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc))); 8209 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8210 } 8211 /* determine if we can go multilevel */ 8212 if (multilevel_requested) { 8213 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8214 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8215 } 8216 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8217 8218 /* dump subassembling pattern */ 8219 if (pcbddc->dbg_flag && multilevel_allowed) { 8220 PetscCall(ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer)); 8221 } 8222 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8223 nedcfield = -1; 8224 corners = NULL; 8225 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8226 PetscInt *tidxs,*tidxs2,nout,tsize,i; 8227 const PetscInt *idxs; 8228 ISLocalToGlobalMapping tmap; 8229 8230 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8231 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap)); 8232 /* allocate space for temporary storage */ 8233 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs)); 8234 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&tidxs2)); 8235 /* allocate for IS array */ 8236 nisdofs = pcbddc->n_ISForDofsLocal; 8237 if (pcbddc->nedclocal) { 8238 if (pcbddc->nedfield > -1) { 8239 nedcfield = pcbddc->nedfield; 8240 } else { 8241 nedcfield = 0; 8242 PetscCheck(!nisdofs,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8243 nisdofs = 1; 8244 } 8245 } 8246 nisneu = !!pcbddc->NeumannBoundariesLocal; 8247 nisvert = 0; /* nisvert is not used */ 8248 nis = nisdofs + nisneu + nisvert; 8249 PetscCall(PetscMalloc1(nis,&isarray)); 8250 /* dofs splitting */ 8251 for (i=0;i<nisdofs;i++) { 8252 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8253 if (nedcfield != i) { 8254 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize)); 8255 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8256 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8257 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs)); 8258 } else { 8259 PetscCall(ISGetLocalSize(pcbddc->nedclocal,&tsize)); 8260 PetscCall(ISGetIndices(pcbddc->nedclocal,&idxs)); 8261 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8262 PetscCheckFalse(tsize != nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8263 PetscCall(ISRestoreIndices(pcbddc->nedclocal,&idxs)); 8264 } 8265 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i])); 8267 /* PetscCall(ISView(isarray[i],0)); */ 8268 } 8269 /* neumann boundaries */ 8270 if (pcbddc->NeumannBoundariesLocal) { 8271 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8272 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize)); 8273 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8274 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8275 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs)); 8276 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8277 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs])); 8278 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8279 } 8280 /* coordinates */ 8281 if (pcbddc->corner_selected) { 8282 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8283 PetscCall(ISGetLocalSize(corners,&tsize)); 8284 PetscCall(ISGetIndices(corners,&idxs)); 8285 PetscCall(ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs)); 8286 PetscCheckFalse(tsize != nout,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout); 8287 PetscCall(ISRestoreIndices(corners,&idxs)); 8288 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners)); 8289 PetscCall(ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2)); 8290 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners)); 8291 } 8292 PetscCall(PetscFree(tidxs)); 8293 PetscCall(PetscFree(tidxs2)); 8294 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8295 } else { 8296 nis = 0; 8297 nisdofs = 0; 8298 nisneu = 0; 8299 nisvert = 0; 8300 isarray = NULL; 8301 } 8302 /* destroy no longer needed map */ 8303 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8304 8305 /* subassemble */ 8306 if (multilevel_allowed) { 8307 Vec vp[1]; 8308 PetscInt nvecs = 0; 8309 PetscBool reuse,reuser; 8310 8311 if (coarse_mat) reuse = PETSC_TRUE; 8312 else reuse = PETSC_FALSE; 8313 PetscCallMPI(MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8314 vp[0] = NULL; 8315 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8316 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&vp[0])); 8317 PetscCall(VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE)); 8318 PetscCall(VecSetType(vp[0],VECSTANDARD)); 8319 nvecs = 1; 8320 8321 if (pcbddc->divudotp) { 8322 Mat B,loc_divudotp; 8323 Vec v,p; 8324 IS dummy; 8325 PetscInt np; 8326 8327 PetscCall(MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp)); 8328 PetscCall(MatGetSize(loc_divudotp,&np,NULL)); 8329 PetscCall(ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy)); 8330 PetscCall(MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B)); 8331 PetscCall(MatCreateVecs(B,&v,&p)); 8332 PetscCall(VecSet(p,1.)); 8333 PetscCall(MatMultTranspose(B,p,v)); 8334 PetscCall(VecDestroy(&p)); 8335 PetscCall(MatDestroy(&B)); 8336 PetscCall(VecGetArray(vp[0],&array)); 8337 PetscCall(VecPlaceArray(pcbddc->vec1_P,array)); 8338 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P)); 8339 PetscCall(VecResetArray(pcbddc->vec1_P)); 8340 PetscCall(VecRestoreArray(vp[0],&array)); 8341 PetscCall(ISDestroy(&dummy)); 8342 PetscCall(VecDestroy(&v)); 8343 } 8344 } 8345 if (reuser) { 8346 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp)); 8347 } else { 8348 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp)); 8349 } 8350 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8351 PetscScalar *arraym; 8352 const PetscScalar *arrayv; 8353 PetscInt nl; 8354 PetscCall(VecGetLocalSize(vp[0],&nl)); 8355 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp)); 8356 PetscCall(MatDenseGetArray(coarsedivudotp,&arraym)); 8357 PetscCall(VecGetArrayRead(vp[0],&arrayv)); 8358 PetscCall(PetscArraycpy(arraym,arrayv,nl)); 8359 PetscCall(VecRestoreArrayRead(vp[0],&arrayv)); 8360 PetscCall(MatDenseRestoreArray(coarsedivudotp,&arraym)); 8361 PetscCall(VecDestroy(&vp[0])); 8362 } else { 8363 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp)); 8364 } 8365 } else { 8366 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL)); 8367 } 8368 if (coarse_mat_is || coarse_mat) { 8369 if (!multilevel_allowed) { 8370 PetscCall(MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat)); 8371 } else { 8372 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8373 if (coarse_mat_is) { 8374 PetscCheck(!coarse_mat,PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8375 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8376 coarse_mat = coarse_mat_is; 8377 } 8378 } 8379 } 8380 PetscCall(MatDestroy(&t_coarse_mat_is)); 8381 PetscCall(MatDestroy(&coarse_mat_is)); 8382 8383 /* create local to global scatters for coarse problem */ 8384 if (compute_vecs) { 8385 PetscInt lrows; 8386 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8387 if (coarse_mat) { 8388 PetscCall(MatGetLocalSize(coarse_mat,&lrows,NULL)); 8389 } else { 8390 lrows = 0; 8391 } 8392 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec)); 8393 PetscCall(VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE)); 8394 PetscCall(VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8395 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8396 PetscCall(VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob)); 8397 } 8398 PetscCall(ISDestroy(&coarse_is)); 8399 8400 /* set defaults for coarse KSP and PC */ 8401 if (multilevel_allowed) { 8402 coarse_ksp_type = KSPRICHARDSON; 8403 coarse_pc_type = PCBDDC; 8404 } else { 8405 coarse_ksp_type = KSPPREONLY; 8406 coarse_pc_type = PCREDUNDANT; 8407 } 8408 8409 /* print some info if requested */ 8410 if (pcbddc->dbg_flag) { 8411 if (!multilevel_allowed) { 8412 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8413 if (multilevel_requested) { 8414 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio)); 8415 } else if (pcbddc->max_levels) { 8416 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels)); 8417 } 8418 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8419 } 8420 } 8421 8422 /* communicate coarse discrete gradient */ 8423 coarseG = NULL; 8424 if (pcbddc->nedcG && multilevel_allowed) { 8425 MPI_Comm ccomm; 8426 if (coarse_mat) { 8427 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8428 } else { 8429 ccomm = MPI_COMM_NULL; 8430 } 8431 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG)); 8432 } 8433 8434 /* create the coarse KSP object only once with defaults */ 8435 if (coarse_mat) { 8436 PetscBool isredundant,isbddc,force,valid; 8437 PetscViewer dbg_viewer = NULL; 8438 8439 if (pcbddc->dbg_flag) { 8440 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8441 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level)); 8442 } 8443 if (!pcbddc->coarse_ksp) { 8444 char prefix[256],str_level[16]; 8445 size_t len; 8446 8447 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp)); 8448 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure)); 8449 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1)); 8450 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1)); 8451 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8452 PetscCall(KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type)); 8453 PetscCall(KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE)); 8454 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8455 /* TODO is this logic correct? should check for coarse_mat type */ 8456 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8457 /* prefix */ 8458 PetscCall(PetscStrcpy(prefix,"")); 8459 PetscCall(PetscStrcpy(str_level,"")); 8460 if (!pcbddc->current_level) { 8461 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix))); 8462 PetscCall(PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix))); 8463 } else { 8464 PetscCall(PetscStrlen(((PetscObject)pc)->prefix,&len)); 8465 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8466 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8467 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8468 PetscCall(PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1)); 8469 PetscCall(PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level))); 8470 PetscCall(PetscStrlcat(prefix,str_level,sizeof(prefix))); 8471 } 8472 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix)); 8473 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8474 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8475 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8476 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8477 /* allow user customization */ 8478 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8479 /* get some info after set from options */ 8480 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8481 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8482 force = PETSC_FALSE; 8483 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8484 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8485 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8486 if (multilevel_allowed && !force && !valid) { 8487 isbddc = PETSC_TRUE; 8488 PetscCall(PCSetType(pc_temp,PCBDDC)); 8489 PetscCall(PCBDDCSetLevel(pc_temp,pcbddc->current_level+1)); 8490 PetscCall(PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio)); 8491 PetscCall(PCBDDCSetLevels(pc_temp,pcbddc->max_levels)); 8492 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8493 ierr = PetscObjectOptionsBegin((PetscObject)pc_temp);PetscCall(ierr); 8494 PetscCall((*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp)); 8495 PetscCall(PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp)); 8496 ierr = PetscOptionsEnd();PetscCall(ierr); 8497 pc_temp->setfromoptionscalled++; 8498 } 8499 } 8500 } 8501 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8502 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&pc_temp)); 8503 if (nisdofs) { 8504 PetscCall(PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray)); 8505 for (i=0;i<nisdofs;i++) { 8506 PetscCall(ISDestroy(&isarray[i])); 8507 } 8508 } 8509 if (nisneu) { 8510 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs])); 8511 PetscCall(ISDestroy(&isarray[nisdofs])); 8512 } 8513 if (nisvert) { 8514 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1])); 8515 PetscCall(ISDestroy(&isarray[nis-1])); 8516 } 8517 if (coarseG) { 8518 PetscCall(PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE)); 8519 } 8520 8521 /* get some info after set from options */ 8522 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8523 8524 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8525 if (isbddc && !multilevel_allowed) { 8526 PetscCall(PCSetType(pc_temp,coarse_pc_type)); 8527 } 8528 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8529 force = PETSC_FALSE; 8530 PetscCall(PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL)); 8531 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"")); 8532 if (multilevel_requested && multilevel_allowed && !valid && !force) { 8533 PetscCall(PCSetType(pc_temp,PCBDDC)); 8534 } 8535 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant)); 8536 if (isredundant) { 8537 KSP inner_ksp; 8538 PC inner_pc; 8539 8540 PetscCall(PCRedundantGetKSP(pc_temp,&inner_ksp)); 8541 PetscCall(KSPGetPC(inner_ksp,&inner_pc)); 8542 } 8543 8544 /* parameters which miss an API */ 8545 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc)); 8546 if (isbddc) { 8547 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8548 8549 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8550 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8551 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8552 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8553 if (pcbddc_coarse->benign_saddle_point) { 8554 Mat coarsedivudotp_is; 8555 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8556 IS row,col; 8557 const PetscInt *gidxs; 8558 PetscInt n,st,M,N; 8559 8560 PetscCall(MatGetSize(coarsedivudotp,&n,NULL)); 8561 PetscCallMPI(MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat))); 8562 st = st-n; 8563 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row)); 8564 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL)); 8565 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap,&n)); 8566 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs)); 8567 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col)); 8568 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs)); 8569 PetscCall(ISLocalToGlobalMappingCreateIS(row,&rl2g)); 8570 PetscCall(ISLocalToGlobalMappingCreateIS(col,&cl2g)); 8571 PetscCall(ISGetSize(row,&M)); 8572 PetscCall(MatGetSize(coarse_mat,&N,NULL)); 8573 PetscCall(ISDestroy(&row)); 8574 PetscCall(ISDestroy(&col)); 8575 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is)); 8576 PetscCall(MatSetType(coarsedivudotp_is,MATIS)); 8577 PetscCall(MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N)); 8578 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g)); 8579 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8580 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8581 PetscCall(MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp)); 8582 PetscCall(MatDestroy(&coarsedivudotp)); 8583 PetscCall(PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL)); 8584 PetscCall(MatDestroy(&coarsedivudotp_is)); 8585 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8586 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8587 } 8588 } 8589 8590 /* propagate symmetry info of coarse matrix */ 8591 PetscCall(MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 8592 if (pc->pmat->symmetric_set) { 8593 PetscCall(MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric)); 8594 } 8595 if (pc->pmat->hermitian_set) { 8596 PetscCall(MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian)); 8597 } 8598 if (pc->pmat->spd_set) { 8599 PetscCall(MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd)); 8600 } 8601 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8602 PetscCall(MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE)); 8603 } 8604 /* set operators */ 8605 PetscCall(MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view")); 8606 PetscCall(MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix)); 8607 PetscCall(KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat)); 8608 if (pcbddc->dbg_flag) { 8609 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level)); 8610 } 8611 } 8612 PetscCall(MatDestroy(&coarseG)); 8613 PetscCall(PetscFree(isarray)); 8614 #if 0 8615 { 8616 PetscViewer viewer; 8617 char filename[256]; 8618 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8619 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8620 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8621 PetscCall(MatView(coarse_mat,viewer)); 8622 PetscCall(PetscViewerPopFormat(viewer)); 8623 PetscCall(PetscViewerDestroy(&viewer)); 8624 } 8625 #endif 8626 8627 if (corners) { 8628 Vec gv; 8629 IS is; 8630 const PetscInt *idxs; 8631 PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim; 8632 PetscScalar *coords; 8633 8634 PetscCheck(pcbddc->mat_graph->cloc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates"); 8635 PetscCall(VecGetSize(pcbddc->coarse_vec,&N)); 8636 PetscCall(VecGetLocalSize(pcbddc->coarse_vec,&n)); 8637 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv)); 8638 PetscCall(VecSetBlockSize(gv,cdim)); 8639 PetscCall(VecSetSizes(gv,n*cdim,N*cdim)); 8640 PetscCall(VecSetType(gv,VECSTANDARD)); 8641 PetscCall(VecSetFromOptions(gv)); 8642 PetscCall(VecSet(gv,PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8643 8644 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8645 PetscCall(ISGetLocalSize(is,&n)); 8646 PetscCall(ISGetIndices(is,&idxs)); 8647 PetscCall(PetscMalloc1(n*cdim,&coords)); 8648 for (i=0;i<n;i++) { 8649 for (d=0;d<cdim;d++) { 8650 coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d]; 8651 } 8652 } 8653 PetscCall(ISRestoreIndices(is,&idxs)); 8654 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is)); 8655 8656 PetscCall(ISGetLocalSize(corners,&n)); 8657 PetscCall(ISGetIndices(corners,&idxs)); 8658 PetscCall(VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES)); 8659 PetscCall(ISRestoreIndices(corners,&idxs)); 8660 PetscCall(PetscFree(coords)); 8661 PetscCall(VecAssemblyBegin(gv)); 8662 PetscCall(VecAssemblyEnd(gv)); 8663 PetscCall(VecGetArray(gv,&coords)); 8664 if (pcbddc->coarse_ksp) { 8665 PC coarse_pc; 8666 PetscBool isbddc; 8667 8668 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&coarse_pc)); 8669 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc)); 8670 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8671 PetscReal *realcoords; 8672 8673 PetscCall(VecGetLocalSize(gv,&n)); 8674 #if defined(PETSC_USE_COMPLEX) 8675 PetscCall(PetscMalloc1(n,&realcoords)); 8676 for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]); 8677 #else 8678 realcoords = coords; 8679 #endif 8680 PetscCall(PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords)); 8681 #if defined(PETSC_USE_COMPLEX) 8682 PetscCall(PetscFree(realcoords)); 8683 #endif 8684 } 8685 } 8686 PetscCall(VecRestoreArray(gv,&coords)); 8687 PetscCall(VecDestroy(&gv)); 8688 } 8689 PetscCall(ISDestroy(&corners)); 8690 8691 if (pcbddc->coarse_ksp) { 8692 Vec crhs,csol; 8693 8694 PetscCall(KSPGetSolution(pcbddc->coarse_ksp,&csol)); 8695 PetscCall(KSPGetRhs(pcbddc->coarse_ksp,&crhs)); 8696 if (!csol) { 8697 PetscCall(MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL)); 8698 } 8699 if (!crhs) { 8700 PetscCall(MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs))); 8701 } 8702 } 8703 PetscCall(MatDestroy(&coarsedivudotp)); 8704 8705 /* compute null space for coarse solver if the benign trick has been requested */ 8706 if (pcbddc->benign_null) { 8707 8708 PetscCall(VecSet(pcbddc->vec1_P,0.)); 8709 for (i=0;i<pcbddc->benign_n;i++) { 8710 PetscCall(VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES)); 8711 } 8712 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8713 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8714 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8715 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD)); 8716 if (coarse_mat) { 8717 Vec nullv; 8718 PetscScalar *array,*array2; 8719 PetscInt nl; 8720 8721 PetscCall(MatCreateVecs(coarse_mat,&nullv,NULL)); 8722 PetscCall(VecGetLocalSize(nullv,&nl)); 8723 PetscCall(VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8724 PetscCall(VecGetArray(nullv,&array2)); 8725 PetscCall(PetscArraycpy(array2,array,nl)); 8726 PetscCall(VecRestoreArray(nullv,&array2)); 8727 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array)); 8728 PetscCall(VecNormalize(nullv,NULL)); 8729 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace)); 8730 PetscCall(VecDestroy(&nullv)); 8731 } 8732 } 8733 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0)); 8734 8735 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8736 if (pcbddc->coarse_ksp) { 8737 PetscBool ispreonly; 8738 8739 if (CoarseNullSpace) { 8740 PetscBool isnull; 8741 PetscCall(MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull)); 8742 if (isnull) { 8743 PetscCall(MatSetNullSpace(coarse_mat,CoarseNullSpace)); 8744 } 8745 /* TODO: add local nullspaces (if any) */ 8746 } 8747 /* setup coarse ksp */ 8748 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8749 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8750 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly)); 8751 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8752 KSP check_ksp; 8753 KSPType check_ksp_type; 8754 PC check_pc; 8755 Vec check_vec,coarse_vec; 8756 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8757 PetscInt its; 8758 PetscBool compute_eigs; 8759 PetscReal *eigs_r,*eigs_c; 8760 PetscInt neigs; 8761 const char *prefix; 8762 8763 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8764 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp)); 8765 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0)); 8766 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE)); 8767 PetscCall(KSPSetOperators(check_ksp,coarse_mat,coarse_mat)); 8768 PetscCall(KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size)); 8769 /* prevent from setup unneeded object */ 8770 PetscCall(KSPGetPC(check_ksp,&check_pc)); 8771 PetscCall(PCSetType(check_pc,PCNONE)); 8772 if (ispreonly) { 8773 check_ksp_type = KSPPREONLY; 8774 compute_eigs = PETSC_FALSE; 8775 } else { 8776 check_ksp_type = KSPGMRES; 8777 compute_eigs = PETSC_TRUE; 8778 } 8779 PetscCall(KSPSetType(check_ksp,check_ksp_type)); 8780 PetscCall(KSPSetComputeSingularValues(check_ksp,compute_eigs)); 8781 PetscCall(KSPSetComputeEigenvalues(check_ksp,compute_eigs)); 8782 PetscCall(KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1)); 8783 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix)); 8784 PetscCall(KSPSetOptionsPrefix(check_ksp,prefix)); 8785 PetscCall(KSPAppendOptionsPrefix(check_ksp,"check_")); 8786 PetscCall(KSPSetFromOptions(check_ksp)); 8787 PetscCall(KSPSetUp(check_ksp)); 8788 PetscCall(KSPGetPC(pcbddc->coarse_ksp,&check_pc)); 8789 PetscCall(KSPSetPC(check_ksp,check_pc)); 8790 /* create random vec */ 8791 PetscCall(MatCreateVecs(coarse_mat,&coarse_vec,&check_vec)); 8792 PetscCall(VecSetRandom(check_vec,NULL)); 8793 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8794 /* solve coarse problem */ 8795 PetscCall(KSPSolve(check_ksp,coarse_vec,coarse_vec)); 8796 PetscCall(KSPCheckSolve(check_ksp,pc,coarse_vec)); 8797 /* set eigenvalue estimation if preonly has not been requested */ 8798 if (compute_eigs) { 8799 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_r)); 8800 PetscCall(PetscMalloc1(pcbddc->coarse_size+1,&eigs_c)); 8801 PetscCall(KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs)); 8802 if (neigs) { 8803 lambda_max = eigs_r[neigs-1]; 8804 lambda_min = eigs_r[0]; 8805 if (pcbddc->use_coarse_estimates) { 8806 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8807 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min)); 8808 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min))); 8809 } 8810 } 8811 } 8812 } 8813 8814 /* check coarse problem residual error */ 8815 if (pcbddc->dbg_flag) { 8816 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8817 PetscCall(PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1))); 8818 PetscCall(VecAXPY(check_vec,-1.0,coarse_vec)); 8819 PetscCall(VecNorm(check_vec,NORM_INFINITY,&infty_error)); 8820 PetscCall(MatMult(coarse_mat,check_vec,coarse_vec)); 8821 PetscCall(VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error)); 8822 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates)); 8823 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer)); 8824 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer)); 8825 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error)); 8826 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error)); 8827 if (CoarseNullSpace) { 8828 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n")); 8829 } 8830 if (compute_eigs) { 8831 PetscReal lambda_max_s,lambda_min_s; 8832 KSPConvergedReason reason; 8833 PetscCall(KSPGetType(check_ksp,&check_ksp_type)); 8834 PetscCall(KSPGetIterationNumber(check_ksp,&its)); 8835 PetscCall(KSPGetConvergedReason(check_ksp,&reason)); 8836 PetscCall(KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s)); 8837 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s)); 8838 for (i=0;i<neigs;i++) { 8839 PetscCall(PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i])); 8840 } 8841 } 8842 PetscCall(PetscViewerFlush(dbg_viewer)); 8843 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1))); 8844 } 8845 PetscCall(VecDestroy(&check_vec)); 8846 PetscCall(VecDestroy(&coarse_vec)); 8847 PetscCall(KSPDestroy(&check_ksp)); 8848 if (compute_eigs) { 8849 PetscCall(PetscFree(eigs_r)); 8850 PetscCall(PetscFree(eigs_c)); 8851 } 8852 } 8853 } 8854 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8855 /* print additional info */ 8856 if (pcbddc->dbg_flag) { 8857 /* waits until all processes reaches this point */ 8858 PetscCall(PetscBarrier((PetscObject)pc)); 8859 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level)); 8860 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8861 } 8862 8863 /* free memory */ 8864 PetscCall(MatDestroy(&coarse_mat)); 8865 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0)); 8866 PetscFunctionReturn(0); 8867 } 8868 8869 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8870 { 8871 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8872 PC_IS* pcis = (PC_IS*)pc->data; 8873 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8874 IS subset,subset_mult,subset_n; 8875 PetscInt local_size,coarse_size=0; 8876 PetscInt *local_primal_indices=NULL; 8877 const PetscInt *t_local_primal_indices; 8878 8879 PetscFunctionBegin; 8880 /* Compute global number of coarse dofs */ 8881 PetscCheckFalse(pcbddc->local_primal_size && !pcbddc->local_primal_ref_node,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8882 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n)); 8883 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset)); 8884 PetscCall(ISDestroy(&subset_n)); 8885 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult)); 8886 PetscCall(ISRenumber(subset,subset_mult,&coarse_size,&subset_n)); 8887 PetscCall(ISDestroy(&subset)); 8888 PetscCall(ISDestroy(&subset_mult)); 8889 PetscCall(ISGetLocalSize(subset_n,&local_size)); 8890 PetscCheckFalse(local_size != pcbddc->local_primal_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8891 PetscCall(PetscMalloc1(local_size,&local_primal_indices)); 8892 PetscCall(ISGetIndices(subset_n,&t_local_primal_indices)); 8893 PetscCall(PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size)); 8894 PetscCall(ISRestoreIndices(subset_n,&t_local_primal_indices)); 8895 PetscCall(ISDestroy(&subset_n)); 8896 8897 /* check numbering */ 8898 if (pcbddc->dbg_flag) { 8899 PetscScalar coarsesum,*array,*array2; 8900 PetscInt i; 8901 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8902 8903 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8904 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n")); 8905 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n")); 8906 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8907 /* counter */ 8908 PetscCall(VecSet(pcis->vec1_global,0.0)); 8909 PetscCall(VecSet(pcis->vec1_N,1.0)); 8910 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8911 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8912 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8913 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD)); 8914 PetscCall(VecSet(pcis->vec1_N,0.0)); 8915 for (i=0;i<pcbddc->local_primal_size;i++) { 8916 PetscCall(VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES)); 8917 } 8918 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8919 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8920 PetscCall(VecSet(pcis->vec1_global,0.0)); 8921 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8922 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8923 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8924 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD)); 8925 PetscCall(VecGetArray(pcis->vec1_N,&array)); 8926 PetscCall(VecGetArray(pcis->vec2_N,&array2)); 8927 for (i=0;i<pcis->n;i++) { 8928 if (array[i] != 0.0 && array[i] != array2[i]) { 8929 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8930 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8931 set_error = PETSC_TRUE; 8932 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi)); 8933 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh)); 8934 } 8935 } 8936 PetscCall(VecRestoreArray(pcis->vec2_N,&array2)); 8937 PetscCallMPI(MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 8938 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8939 for (i=0;i<pcis->n;i++) { 8940 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8941 } 8942 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 8943 PetscCall(VecSet(pcis->vec1_global,0.0)); 8944 PetscCall(VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8945 PetscCall(VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE)); 8946 PetscCall(VecSum(pcis->vec1_global,&coarsesum)); 8947 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum))); 8948 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8949 PetscInt *gidxs; 8950 8951 PetscCall(PetscMalloc1(pcbddc->local_primal_size,&gidxs)); 8952 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs)); 8953 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n")); 8954 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8955 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank)); 8956 for (i=0;i<pcbddc->local_primal_size;i++) { 8957 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i])); 8958 } 8959 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8960 PetscCall(PetscFree(gidxs)); 8961 } 8962 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8963 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8964 PetscCheck(!set_error_reduced,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8965 } 8966 8967 /* get back data */ 8968 *coarse_size_n = coarse_size; 8969 *local_primal_indices_n = local_primal_indices; 8970 PetscFunctionReturn(0); 8971 } 8972 8973 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8974 { 8975 IS localis_t; 8976 PetscInt i,lsize,*idxs,n; 8977 PetscScalar *vals; 8978 8979 PetscFunctionBegin; 8980 /* get indices in local ordering exploiting local to global map */ 8981 PetscCall(ISGetLocalSize(globalis,&lsize)); 8982 PetscCall(PetscMalloc1(lsize,&vals)); 8983 for (i=0;i<lsize;i++) vals[i] = 1.0; 8984 PetscCall(ISGetIndices(globalis,(const PetscInt**)&idxs)); 8985 PetscCall(VecSet(gwork,0.0)); 8986 PetscCall(VecSet(lwork,0.0)); 8987 if (idxs) { /* multilevel guard */ 8988 PetscCall(VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE)); 8989 PetscCall(VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES)); 8990 } 8991 PetscCall(VecAssemblyBegin(gwork)); 8992 PetscCall(ISRestoreIndices(globalis,(const PetscInt**)&idxs)); 8993 PetscCall(PetscFree(vals)); 8994 PetscCall(VecAssemblyEnd(gwork)); 8995 /* now compute set in local ordering */ 8996 PetscCall(VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 8997 PetscCall(VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD)); 8998 PetscCall(VecGetArrayRead(lwork,(const PetscScalar**)&vals)); 8999 PetscCall(VecGetSize(lwork,&n)); 9000 for (i=0,lsize=0;i<n;i++) { 9001 if (PetscRealPart(vals[i]) > 0.5) { 9002 lsize++; 9003 } 9004 } 9005 PetscCall(PetscMalloc1(lsize,&idxs)); 9006 for (i=0,lsize=0;i<n;i++) { 9007 if (PetscRealPart(vals[i]) > 0.5) { 9008 idxs[lsize++] = i; 9009 } 9010 } 9011 PetscCall(VecRestoreArrayRead(lwork,(const PetscScalar**)&vals)); 9012 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t)); 9013 *localis = localis_t; 9014 PetscFunctionReturn(0); 9015 } 9016 9017 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9018 { 9019 PC_IS *pcis=(PC_IS*)pc->data; 9020 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9021 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 9022 Mat S_j; 9023 PetscInt *used_xadj,*used_adjncy; 9024 PetscBool free_used_adj; 9025 PetscErrorCode ierr; 9026 9027 PetscFunctionBegin; 9028 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9029 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9030 free_used_adj = PETSC_FALSE; 9031 if (pcbddc->sub_schurs_layers == -1) { 9032 used_xadj = NULL; 9033 used_adjncy = NULL; 9034 } else { 9035 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9036 used_xadj = pcbddc->mat_graph->xadj; 9037 used_adjncy = pcbddc->mat_graph->adjncy; 9038 } else if (pcbddc->computed_rowadj) { 9039 used_xadj = pcbddc->mat_graph->xadj; 9040 used_adjncy = pcbddc->mat_graph->adjncy; 9041 } else { 9042 PetscBool flg_row=PETSC_FALSE; 9043 const PetscInt *xadj,*adjncy; 9044 PetscInt nvtxs; 9045 9046 PetscCall(MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9047 if (flg_row) { 9048 PetscCall(PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy)); 9049 PetscCall(PetscArraycpy(used_xadj,xadj,nvtxs+1)); 9050 PetscCall(PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs])); 9051 free_used_adj = PETSC_TRUE; 9052 } else { 9053 pcbddc->sub_schurs_layers = -1; 9054 used_xadj = NULL; 9055 used_adjncy = NULL; 9056 } 9057 PetscCall(MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row)); 9058 } 9059 } 9060 9061 /* setup sub_schurs data */ 9062 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9063 if (!sub_schurs->schur_explicit) { 9064 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9065 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9066 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL)); 9067 } else { 9068 Mat change = NULL; 9069 Vec scaling = NULL; 9070 IS change_primal = NULL, iP; 9071 PetscInt benign_n; 9072 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9073 PetscBool need_change = PETSC_FALSE; 9074 PetscBool discrete_harmonic = PETSC_FALSE; 9075 9076 if (!pcbddc->use_vertices && reuse_solvers) { 9077 PetscInt n_vertices; 9078 9079 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_vertices)); 9080 reuse_solvers = (PetscBool)!n_vertices; 9081 } 9082 if (!pcbddc->benign_change_explicit) { 9083 benign_n = pcbddc->benign_n; 9084 } else { 9085 benign_n = 0; 9086 } 9087 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9088 We need a global reduction to avoid possible deadlocks. 9089 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9090 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9091 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9092 PetscCallMPI(MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 9093 need_change = (PetscBool)(!need_change); 9094 } 9095 /* If the user defines additional constraints, we import them here. 9096 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 9097 if (need_change) { 9098 PC_IS *pcisf; 9099 PC_BDDC *pcbddcf; 9100 PC pcf; 9101 9102 PetscCheck(!pcbddc->sub_schurs_rebuild,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 9103 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc),&pcf)); 9104 PetscCall(PCSetOperators(pcf,pc->mat,pc->pmat)); 9105 PetscCall(PCSetType(pcf,PCBDDC)); 9106 9107 /* hacks */ 9108 pcisf = (PC_IS*)pcf->data; 9109 pcisf->is_B_local = pcis->is_B_local; 9110 pcisf->vec1_N = pcis->vec1_N; 9111 pcisf->BtoNmap = pcis->BtoNmap; 9112 pcisf->n = pcis->n; 9113 pcisf->n_B = pcis->n_B; 9114 pcbddcf = (PC_BDDC*)pcf->data; 9115 PetscCall(PetscFree(pcbddcf->mat_graph)); 9116 pcbddcf->mat_graph = pcbddc->mat_graph; 9117 pcbddcf->use_faces = PETSC_TRUE; 9118 pcbddcf->use_change_of_basis = PETSC_TRUE; 9119 pcbddcf->use_change_on_faces = PETSC_TRUE; 9120 pcbddcf->use_qr_single = PETSC_TRUE; 9121 pcbddcf->fake_change = PETSC_TRUE; 9122 9123 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 9124 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9125 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 9126 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal)); 9127 change = pcbddcf->ConstraintMatrix; 9128 pcbddcf->ConstraintMatrix = NULL; 9129 9130 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 9131 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9132 PetscCall(MatNullSpaceDestroy(&pcbddcf->onearnullspace)); 9133 PetscCall(PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult)); 9134 PetscCall(PetscFree(pcbddcf->primal_indices_local_idxs)); 9135 PetscCall(PetscFree(pcbddcf->onearnullvecs_state)); 9136 PetscCall(PetscFree(pcf->data)); 9137 pcf->ops->destroy = NULL; 9138 pcf->ops->reset = NULL; 9139 PetscCall(PCDestroy(&pcf)); 9140 } 9141 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9142 9143 PetscCall(PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP)); 9144 if (iP) { 9145 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");PetscCall(ierr); 9146 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL)); 9147 ierr = PetscOptionsEnd();PetscCall(ierr); 9148 } 9149 if (discrete_harmonic) { 9150 Mat A; 9151 PetscCall(MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A)); 9152 PetscCall(MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL)); 9153 PetscCall(PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP)); 9154 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9155 PetscCall(MatDestroy(&A)); 9156 } else { 9157 PetscCall(PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal)); 9158 } 9159 PetscCall(MatDestroy(&change)); 9160 PetscCall(ISDestroy(&change_primal)); 9161 } 9162 PetscCall(MatDestroy(&S_j)); 9163 9164 /* free adjacency */ 9165 if (free_used_adj) { 9166 PetscCall(PetscFree2(used_xadj,used_adjncy)); 9167 } 9168 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0)); 9169 PetscFunctionReturn(0); 9170 } 9171 9172 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9173 { 9174 PC_IS *pcis=(PC_IS*)pc->data; 9175 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9176 PCBDDCGraph graph; 9177 9178 PetscFunctionBegin; 9179 /* attach interface graph for determining subsets */ 9180 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9181 IS verticesIS,verticescomm; 9182 PetscInt vsize,*idxs; 9183 9184 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9185 PetscCall(ISGetSize(verticesIS,&vsize)); 9186 PetscCall(ISGetIndices(verticesIS,(const PetscInt**)&idxs)); 9187 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm)); 9188 PetscCall(ISRestoreIndices(verticesIS,(const PetscInt**)&idxs)); 9189 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS)); 9190 PetscCall(PCBDDCGraphCreate(&graph)); 9191 PetscCall(PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount)); 9192 PetscCall(PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm)); 9193 PetscCall(ISDestroy(&verticescomm)); 9194 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9195 } else { 9196 graph = pcbddc->mat_graph; 9197 } 9198 /* print some info */ 9199 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9200 IS vertices; 9201 PetscInt nv,nedges,nfaces; 9202 PetscCall(PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer)); 9203 PetscCall(PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9204 PetscCall(ISGetSize(vertices,&nv)); 9205 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9206 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n")); 9207 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices)); 9208 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges)); 9209 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces)); 9210 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9211 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9212 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices)); 9213 } 9214 9215 /* sub_schurs init */ 9216 if (!pcbddc->sub_schurs) { 9217 PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9218 } 9219 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild)); 9220 9221 /* free graph struct */ 9222 if (pcbddc->sub_schurs_rebuild) { 9223 PetscCall(PCBDDCGraphDestroy(&graph)); 9224 } 9225 PetscFunctionReturn(0); 9226 } 9227 9228 PetscErrorCode PCBDDCCheckOperator(PC pc) 9229 { 9230 PC_IS *pcis=(PC_IS*)pc->data; 9231 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 9232 9233 PetscFunctionBegin; 9234 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 9235 IS zerodiag = NULL; 9236 Mat S_j,B0_B=NULL; 9237 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 9238 PetscScalar *p0_check,*array,*array2; 9239 PetscReal norm; 9240 PetscInt i; 9241 9242 /* B0 and B0_B */ 9243 if (zerodiag) { 9244 IS dummy; 9245 9246 PetscCall(ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy)); 9247 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B)); 9248 PetscCall(MatCreateVecs(B0_B,NULL,&dummy_vec)); 9249 PetscCall(ISDestroy(&dummy)); 9250 } 9251 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 9252 PetscCall(VecDuplicate(pcbddc->vec1_P,&vec_scale_P)); 9253 PetscCall(VecSet(pcbddc->vec1_P,1.0)); 9254 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9255 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9256 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9257 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE)); 9258 PetscCall(VecReciprocal(vec_scale_P)); 9259 /* S_j */ 9260 PetscCall(MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j)); 9261 PetscCall(MatSchurComplementSetKSP(S_j,pcbddc->ksp_D)); 9262 9263 /* mimic vector in \widetilde{W}_\Gamma */ 9264 PetscCall(VecSetRandom(pcis->vec1_N,NULL)); 9265 /* continuous in primal space */ 9266 PetscCall(VecSetRandom(pcbddc->coarse_vec,NULL)); 9267 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9268 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9269 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9270 PetscCall(PetscCalloc1(pcbddc->benign_n,&p0_check)); 9271 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 9272 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9273 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9274 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9275 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9276 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9277 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD)); 9278 PetscCall(VecDuplicate(pcis->vec2_B,&vec_check_B)); 9279 PetscCall(VecCopy(pcis->vec2_B,vec_check_B)); 9280 9281 /* assemble rhs for coarse problem */ 9282 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 9283 /* local with Schur */ 9284 PetscCall(MatMult(S_j,pcis->vec2_B,pcis->vec1_B)); 9285 if (zerodiag) { 9286 PetscCall(VecGetArray(dummy_vec,&array)); 9287 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 9288 PetscCall(VecRestoreArray(dummy_vec,&array)); 9289 PetscCall(MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B)); 9290 } 9291 /* sum on primal nodes the local contributions */ 9292 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9293 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE)); 9294 PetscCall(VecGetArray(pcis->vec1_N,&array)); 9295 PetscCall(VecGetArray(pcbddc->vec1_P,&array2)); 9296 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9297 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array2)); 9298 PetscCall(VecRestoreArray(pcis->vec1_N,&array)); 9299 PetscCall(VecSet(pcbddc->coarse_vec,0.)); 9300 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9301 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD)); 9302 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9303 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE)); 9304 PetscCall(VecGetArray(pcbddc->vec1_P,&array)); 9305 /* scale primal nodes (BDDC sums contibutions) */ 9306 PetscCall(VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P)); 9307 PetscCall(VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES)); 9308 PetscCall(VecRestoreArray(pcbddc->vec1_P,&array)); 9309 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 9310 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 9311 PetscCall(VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9312 PetscCall(VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD)); 9313 /* global: \widetilde{B0}_B w_\Gamma */ 9314 if (zerodiag) { 9315 PetscCall(MatMult(B0_B,pcis->vec2_B,dummy_vec)); 9316 PetscCall(VecGetArray(dummy_vec,&array)); 9317 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9318 PetscCall(VecRestoreArray(dummy_vec,&array)); 9319 } 9320 /* BDDC */ 9321 PetscCall(VecSet(pcis->vec1_D,0.)); 9322 PetscCall(PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE)); 9323 9324 PetscCall(VecCopy(pcis->vec1_B,pcis->vec2_B)); 9325 PetscCall(VecAXPY(pcis->vec1_B,-1.0,vec_check_B)); 9326 PetscCall(VecNorm(pcis->vec1_B,NORM_INFINITY,&norm)); 9327 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm)); 9328 for (i=0;i<pcbddc->benign_n;i++) { 9329 PetscCall(PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]))); 9330 } 9331 PetscCall(PetscFree(p0_check)); 9332 PetscCall(VecDestroy(&vec_scale_P)); 9333 PetscCall(VecDestroy(&vec_check_B)); 9334 PetscCall(VecDestroy(&dummy_vec)); 9335 PetscCall(MatDestroy(&S_j)); 9336 PetscCall(MatDestroy(&B0_B)); 9337 } 9338 PetscFunctionReturn(0); 9339 } 9340 9341 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9342 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9343 { 9344 Mat At; 9345 IS rows; 9346 PetscInt rst,ren; 9347 PetscLayout rmap; 9348 9349 PetscFunctionBegin; 9350 rst = ren = 0; 9351 if (ccomm != MPI_COMM_NULL) { 9352 PetscCall(PetscLayoutCreate(ccomm,&rmap)); 9353 PetscCall(PetscLayoutSetSize(rmap,A->rmap->N)); 9354 PetscCall(PetscLayoutSetBlockSize(rmap,1)); 9355 PetscCall(PetscLayoutSetUp(rmap)); 9356 PetscCall(PetscLayoutGetRange(rmap,&rst,&ren)); 9357 } 9358 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows)); 9359 PetscCall(MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At)); 9360 PetscCall(ISDestroy(&rows)); 9361 9362 if (ccomm != MPI_COMM_NULL) { 9363 Mat_MPIAIJ *a,*b; 9364 IS from,to; 9365 Vec gvec; 9366 PetscInt lsize; 9367 9368 PetscCall(MatCreate(ccomm,B)); 9369 PetscCall(MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N)); 9370 PetscCall(MatSetType(*B,MATAIJ)); 9371 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9372 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9373 a = (Mat_MPIAIJ*)At->data; 9374 b = (Mat_MPIAIJ*)(*B)->data; 9375 PetscCallMPI(MPI_Comm_size(ccomm,&b->size)); 9376 PetscCallMPI(MPI_Comm_rank(ccomm,&b->rank)); 9377 PetscCall(PetscObjectReference((PetscObject)a->A)); 9378 PetscCall(PetscObjectReference((PetscObject)a->B)); 9379 b->A = a->A; 9380 b->B = a->B; 9381 9382 b->donotstash = a->donotstash; 9383 b->roworiented = a->roworiented; 9384 b->rowindices = NULL; 9385 b->rowvalues = NULL; 9386 b->getrowactive = PETSC_FALSE; 9387 9388 (*B)->rmap = rmap; 9389 (*B)->factortype = A->factortype; 9390 (*B)->assembled = PETSC_TRUE; 9391 (*B)->insertmode = NOT_SET_VALUES; 9392 (*B)->preallocated = PETSC_TRUE; 9393 9394 if (a->colmap) { 9395 #if defined(PETSC_USE_CTABLE) 9396 PetscCall(PetscTableCreateCopy(a->colmap,&b->colmap)); 9397 #else 9398 PetscCall(PetscMalloc1(At->cmap->N,&b->colmap)); 9399 PetscCall(PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt))); 9400 PetscCall(PetscArraycpy(b->colmap,a->colmap,At->cmap->N)); 9401 #endif 9402 } else b->colmap = NULL; 9403 if (a->garray) { 9404 PetscInt len; 9405 len = a->B->cmap->n; 9406 PetscCall(PetscMalloc1(len+1,&b->garray)); 9407 PetscCall(PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt))); 9408 if (len) PetscCall(PetscArraycpy(b->garray,a->garray,len)); 9409 } else b->garray = NULL; 9410 9411 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9412 b->lvec = a->lvec; 9413 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec)); 9414 9415 /* cannot use VecScatterCopy */ 9416 PetscCall(VecGetLocalSize(b->lvec,&lsize)); 9417 PetscCall(ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from)); 9418 PetscCall(ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to)); 9419 PetscCall(MatCreateVecs(*B,&gvec,NULL)); 9420 PetscCall(VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx)); 9421 PetscCall(PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx)); 9422 PetscCall(ISDestroy(&from)); 9423 PetscCall(ISDestroy(&to)); 9424 PetscCall(VecDestroy(&gvec)); 9425 } 9426 PetscCall(MatDestroy(&At)); 9427 PetscFunctionReturn(0); 9428 } 9429